Command line
python /home/saxelrod/Repo/projects/chemprop/chemprop/train.py --config_path /home/saxelrod/synthetic/free_energy/train/config.json --data_path /home/saxelrod/synthetic/free_energy/train_full.csv --dataset_type regression
Args
{'activation': 'ReLU',
 'aggregation': 'mean',
 'aggregation_norm': 100,
 'atom_descriptors': None,
 'atom_descriptors_path': None,
 'atom_descriptors_size': 0,
 'atom_features_size': 0,
 'atom_messages': False,
 'batch_size': 50,
 'bias': False,
 'cache_cutoff': 10000,
 'checkpoint_dir': None,
 'checkpoint_path': None,
 'checkpoint_paths': None,
 'class_balance': False,
 'config_path': '/home/saxelrod/synthetic/free_energy/train/config.json',
 'crossval_index_dir': None,
 'crossval_index_file': None,
 'crossval_index_sets': None,
 'cuda': True,
 'data_path': '/home/saxelrod/synthetic/free_energy/train_full.csv',
 'dataset_type': 'regression',
 'depth': 5,
 'device': device(type='cuda', index=1),
 'dropout': 0.1,
 'ensemble_size': 1,
 'epochs': 30,
 'extra_metrics': [],
 'features_generator': None,
 'features_only': False,
 'features_path': None,
 'features_scaling': False,
 'features_size': None,
 'ffn_hidden_size': 500,
 'ffn_num_layers': 3,
 'final_lr': 0.0001,
 'folds_file': None,
 'gpu': 1,
 'grad_clip': None,
 'hidden_size': 500,
 'ignore_columns': None,
 'init_lr': 0.0001,
 'log_frequency': 10,
 'max_data_size': None,
 'max_lr': 0.001,
 'metric': 'mae',
 'metrics': ['mae'],
 'minimize_score': True,
 'mpn_shared': False,
 'multiclass_num_classes': 3,
 'no_cache_mol': False,
 'no_cuda': False,
 'no_features_scaling': True,
 'num_folds': 10,
 'num_lrs': 1,
 'num_tasks': 1,
 'num_workers': 8,
 'number_of_molecules': 1,
 'pytorch_seed': 0,
 'quiet': True,
 'save_dir': '/home/saxelrod/synthetic/free_energy/train',
 'save_preds': False,
 'save_smiles_splits': False,
 'seed': 0,
 'separate_test_features_path': None,
 'separate_test_path': '/home/saxelrod/synthetic/free_energy/test_full.csv',
 'separate_val_features_path': None,
 'separate_val_path': '/home/saxelrod/synthetic/free_energy/val_full.csv',
 'show_individual_scores': False,
 'smiles_columns': [None],
 'split_sizes': (0.8, 0.1, 0.1),
 'split_type': 'random',
 'target_columns': None,
 'task_names': ['ensemblefreeenergy'],
 'test': False,
 'test_fold_index': None,
 'train_data_size': None,
 'undirected': False,
 'use_input_features': False,
 'val_fold_index': None,
 'warmup_epochs': 2.0}
Loading data
Number of tasks = 1
Fold 0
Splitting data with seed 0
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.1, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=500, bias=False)
        (W_h): Linear(in_features=500, out_features=500, bias=False)
        (W_o): Linear(in_features=633, out_features=500, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.1, inplace=False)
    (1): Linear(in_features=500, out_features=500, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.1, inplace=False)
    (4): Linear(in_features=500, out_features=500, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.1, inplace=False)
    (7): Linear(in_features=500, out_features=1, bias=True)
  )
)
Number of parameters = 1,142,001
Moving model to cuda
Epoch 0
Loss = 9.4831e-01, PNorm = 47.8611, GNorm = 5.5544, lr_0 = 1.0413e-04
Loss = 9.4296e-01, PNorm = 47.8623, GNorm = 3.7118, lr_0 = 1.0788e-04
Loss = 9.3646e-01, PNorm = 47.8642, GNorm = 7.1054, lr_0 = 1.1163e-04
Loss = 9.0877e-01, PNorm = 47.8656, GNorm = 2.1738, lr_0 = 1.1537e-04
Loss = 9.5442e-01, PNorm = 47.8681, GNorm = 3.2246, lr_0 = 1.1913e-04
Loss = 8.7035e-01, PNorm = 47.8714, GNorm = 2.3724, lr_0 = 1.2287e-04
Loss = 8.4705e-01, PNorm = 47.8762, GNorm = 3.0429, lr_0 = 1.2663e-04
Loss = 7.1660e-01, PNorm = 47.8815, GNorm = 2.2224, lr_0 = 1.3038e-04
Loss = 9.0375e-01, PNorm = 47.8861, GNorm = 9.5235, lr_0 = 1.3413e-04
Loss = 6.9789e-01, PNorm = 47.8919, GNorm = 5.0641, lr_0 = 1.3788e-04
Loss = 6.2205e-01, PNorm = 47.9003, GNorm = 1.6684, lr_0 = 1.4163e-04
Loss = 5.9325e-01, PNorm = 47.9070, GNorm = 6.9083, lr_0 = 1.4537e-04
Loss = 5.0111e-01, PNorm = 47.9136, GNorm = 2.2134, lr_0 = 1.4913e-04
Loss = 5.2770e-01, PNorm = 47.9206, GNorm = 4.6311, lr_0 = 1.5288e-04
Loss = 5.0613e-01, PNorm = 47.9254, GNorm = 12.4506, lr_0 = 1.5662e-04
Loss = 5.4134e-01, PNorm = 47.9294, GNorm = 3.5012, lr_0 = 1.6038e-04
Loss = 4.9165e-01, PNorm = 47.9371, GNorm = 3.0989, lr_0 = 1.6412e-04
Loss = 4.6696e-01, PNorm = 47.9433, GNorm = 15.7761, lr_0 = 1.6788e-04
Loss = 4.1515e-01, PNorm = 47.9480, GNorm = 17.8364, lr_0 = 1.7163e-04
Loss = 4.1962e-01, PNorm = 47.9540, GNorm = 7.0245, lr_0 = 1.7538e-04
Loss = 4.6183e-01, PNorm = 47.9597, GNorm = 4.4221, lr_0 = 1.7913e-04
Loss = 4.7517e-01, PNorm = 47.9658, GNorm = 3.8458, lr_0 = 1.8288e-04
Loss = 4.0485e-01, PNorm = 47.9728, GNorm = 2.0060, lr_0 = 1.8662e-04
Loss = 4.0939e-01, PNorm = 47.9780, GNorm = 10.4929, lr_0 = 1.9038e-04
Loss = 3.7935e-01, PNorm = 47.9816, GNorm = 2.6177, lr_0 = 1.9413e-04
Loss = 3.7641e-01, PNorm = 47.9881, GNorm = 1.8646, lr_0 = 1.9788e-04
Loss = 4.8204e-01, PNorm = 47.9920, GNorm = 2.8039, lr_0 = 2.0163e-04
Loss = 4.7111e-01, PNorm = 47.9976, GNorm = 4.8298, lr_0 = 2.0537e-04
Loss = 4.0122e-01, PNorm = 48.0053, GNorm = 8.1317, lr_0 = 2.0913e-04
Loss = 3.5717e-01, PNorm = 48.0116, GNorm = 2.4750, lr_0 = 2.1288e-04
Loss = 3.7304e-01, PNorm = 48.0187, GNorm = 3.7409, lr_0 = 2.1663e-04
Loss = 3.2838e-01, PNorm = 48.0238, GNorm = 7.3349, lr_0 = 2.2038e-04
Loss = 4.2170e-01, PNorm = 48.0276, GNorm = 19.9122, lr_0 = 2.2412e-04
Loss = 3.5311e-01, PNorm = 48.0326, GNorm = 9.1208, lr_0 = 2.2787e-04
Loss = 3.4016e-01, PNorm = 48.0375, GNorm = 6.7983, lr_0 = 2.3163e-04
Loss = 3.5193e-01, PNorm = 48.0417, GNorm = 5.5133, lr_0 = 2.3538e-04
Loss = 4.0053e-01, PNorm = 48.0497, GNorm = 17.4520, lr_0 = 2.3913e-04
Loss = 4.3378e-01, PNorm = 48.0564, GNorm = 3.5824, lr_0 = 2.4288e-04
Loss = 3.3936e-01, PNorm = 48.0641, GNorm = 21.5229, lr_0 = 2.4662e-04
Loss = 3.7960e-01, PNorm = 48.0685, GNorm = 4.4585, lr_0 = 2.5038e-04
Loss = 3.4238e-01, PNorm = 48.0763, GNorm = 8.2393, lr_0 = 2.5413e-04
Loss = 4.1338e-01, PNorm = 48.0830, GNorm = 8.3243, lr_0 = 2.5788e-04
Loss = 3.6943e-01, PNorm = 48.0896, GNorm = 9.6315, lr_0 = 2.6163e-04
Loss = 3.2856e-01, PNorm = 48.0983, GNorm = 1.3824, lr_0 = 2.6537e-04
Loss = 3.6355e-01, PNorm = 48.1026, GNorm = 8.6347, lr_0 = 2.6912e-04
Loss = 3.4428e-01, PNorm = 48.1093, GNorm = 8.8423, lr_0 = 2.7288e-04
Loss = 3.5851e-01, PNorm = 48.1153, GNorm = 6.7174, lr_0 = 2.7663e-04
Loss = 3.7203e-01, PNorm = 48.1185, GNorm = 2.4454, lr_0 = 2.8038e-04
Loss = 3.6890e-01, PNorm = 48.1270, GNorm = 3.5706, lr_0 = 2.8413e-04
Loss = 3.4510e-01, PNorm = 48.1332, GNorm = 9.3664, lr_0 = 2.8787e-04
Loss = 3.0663e-01, PNorm = 48.1384, GNorm = 5.8406, lr_0 = 2.9163e-04
Loss = 3.1235e-01, PNorm = 48.1459, GNorm = 2.2375, lr_0 = 2.9538e-04
Loss = 3.3989e-01, PNorm = 48.1501, GNorm = 4.8456, lr_0 = 2.9913e-04
Loss = 3.5859e-01, PNorm = 48.1592, GNorm = 2.8153, lr_0 = 3.0288e-04
Loss = 3.1218e-01, PNorm = 48.1674, GNorm = 8.2758, lr_0 = 3.0662e-04
Loss = 2.9273e-01, PNorm = 48.1730, GNorm = 4.9659, lr_0 = 3.1037e-04
Loss = 3.2123e-01, PNorm = 48.1784, GNorm = 4.7481, lr_0 = 3.1413e-04
Loss = 3.8017e-01, PNorm = 48.1830, GNorm = 4.5121, lr_0 = 3.1788e-04
Loss = 3.1537e-01, PNorm = 48.1922, GNorm = 11.4232, lr_0 = 3.2163e-04
Loss = 2.7440e-01, PNorm = 48.2018, GNorm = 2.4438, lr_0 = 3.2538e-04
Loss = 2.9983e-01, PNorm = 48.2107, GNorm = 2.6257, lr_0 = 3.2912e-04
Loss = 3.3086e-01, PNorm = 48.2184, GNorm = 11.4484, lr_0 = 3.3288e-04
Loss = 3.3436e-01, PNorm = 48.2246, GNorm = 7.0048, lr_0 = 3.3663e-04
Loss = 3.1691e-01, PNorm = 48.2347, GNorm = 8.3907, lr_0 = 3.4038e-04
Loss = 3.9939e-01, PNorm = 48.2381, GNorm = 14.2652, lr_0 = 3.4413e-04
Loss = 3.8516e-01, PNorm = 48.2490, GNorm = 4.1045, lr_0 = 3.4787e-04
Loss = 3.2627e-01, PNorm = 48.2623, GNorm = 3.9041, lr_0 = 3.5162e-04
Loss = 3.0511e-01, PNorm = 48.2696, GNorm = 3.2211, lr_0 = 3.5538e-04
Loss = 2.8444e-01, PNorm = 48.2801, GNorm = 9.6482, lr_0 = 3.5913e-04
Loss = 3.7375e-01, PNorm = 48.2879, GNorm = 12.9600, lr_0 = 3.6288e-04
Loss = 3.4074e-01, PNorm = 48.2942, GNorm = 3.5218, lr_0 = 3.6662e-04
Loss = 2.8817e-01, PNorm = 48.3049, GNorm = 1.2928, lr_0 = 3.7037e-04
Loss = 3.2148e-01, PNorm = 48.3149, GNorm = 6.4474, lr_0 = 3.7413e-04
Loss = 3.0301e-01, PNorm = 48.3230, GNorm = 3.1675, lr_0 = 3.7788e-04
Loss = 2.8932e-01, PNorm = 48.3312, GNorm = 6.0728, lr_0 = 3.8163e-04
Loss = 2.8411e-01, PNorm = 48.3407, GNorm = 0.8159, lr_0 = 3.8537e-04
Loss = 2.9715e-01, PNorm = 48.3497, GNorm = 9.2365, lr_0 = 3.8912e-04
Loss = 3.0805e-01, PNorm = 48.3568, GNorm = 10.0161, lr_0 = 3.9287e-04
Loss = 2.9545e-01, PNorm = 48.3612, GNorm = 2.3303, lr_0 = 3.9663e-04
Loss = 3.1081e-01, PNorm = 48.3707, GNorm = 8.9378, lr_0 = 4.0038e-04
Loss = 3.7570e-01, PNorm = 48.3848, GNorm = 13.7118, lr_0 = 4.0413e-04
Loss = 3.5522e-01, PNorm = 48.3955, GNorm = 3.6849, lr_0 = 4.0787e-04
Loss = 3.5389e-01, PNorm = 48.4057, GNorm = 9.1307, lr_0 = 4.1162e-04
Loss = 3.3267e-01, PNorm = 48.4193, GNorm = 2.8892, lr_0 = 4.1537e-04
Loss = 2.5880e-01, PNorm = 48.4331, GNorm = 1.4352, lr_0 = 4.1913e-04
Loss = 3.2858e-01, PNorm = 48.4407, GNorm = 5.1965, lr_0 = 4.2288e-04
Loss = 3.1054e-01, PNorm = 48.4511, GNorm = 2.8431, lr_0 = 4.2662e-04
Loss = 2.7012e-01, PNorm = 48.4604, GNorm = 7.2245, lr_0 = 4.3037e-04
Loss = 3.3167e-01, PNorm = 48.4669, GNorm = 5.3906, lr_0 = 4.3412e-04
Loss = 2.9392e-01, PNorm = 48.4766, GNorm = 9.7627, lr_0 = 4.3788e-04
Loss = 2.7199e-01, PNorm = 48.4901, GNorm = 1.8696, lr_0 = 4.4163e-04
Loss = 2.7396e-01, PNorm = 48.4978, GNorm = 5.3834, lr_0 = 4.4538e-04
Loss = 3.2196e-01, PNorm = 48.5087, GNorm = 1.1910, lr_0 = 4.4912e-04
Loss = 2.9056e-01, PNorm = 48.5159, GNorm = 6.7629, lr_0 = 4.5287e-04
Loss = 2.4655e-01, PNorm = 48.5265, GNorm = 1.8441, lr_0 = 4.5662e-04
Loss = 2.5341e-01, PNorm = 48.5371, GNorm = 1.9136, lr_0 = 4.6038e-04
Loss = 2.8396e-01, PNorm = 48.5429, GNorm = 7.6326, lr_0 = 4.6413e-04
Loss = 2.9865e-01, PNorm = 48.5487, GNorm = 7.1431, lr_0 = 4.6787e-04
Loss = 3.0056e-01, PNorm = 48.5565, GNorm = 3.7115, lr_0 = 4.7162e-04
Loss = 3.0255e-01, PNorm = 48.5661, GNorm = 2.3621, lr_0 = 4.7537e-04
Loss = 2.7805e-01, PNorm = 48.5769, GNorm = 3.2897, lr_0 = 4.7913e-04
Loss = 2.9410e-01, PNorm = 48.5906, GNorm = 4.3045, lr_0 = 4.8288e-04
Loss = 2.7233e-01, PNorm = 48.6001, GNorm = 4.9521, lr_0 = 4.8663e-04
Loss = 2.7565e-01, PNorm = 48.6066, GNorm = 4.2747, lr_0 = 4.9038e-04
Loss = 2.6676e-01, PNorm = 48.6145, GNorm = 1.1113, lr_0 = 4.9412e-04
Loss = 3.3839e-01, PNorm = 48.6269, GNorm = 5.1142, lr_0 = 4.9788e-04
Loss = 3.2169e-01, PNorm = 48.6411, GNorm = 1.9693, lr_0 = 5.0163e-04
Loss = 2.5788e-01, PNorm = 48.6540, GNorm = 1.1112, lr_0 = 5.0538e-04
Loss = 2.9961e-01, PNorm = 48.6673, GNorm = 6.0847, lr_0 = 5.0913e-04
Loss = 2.7917e-01, PNorm = 48.6802, GNorm = 6.3527, lr_0 = 5.1287e-04
Loss = 3.0465e-01, PNorm = 48.6921, GNorm = 4.8996, lr_0 = 5.1663e-04
Loss = 2.4670e-01, PNorm = 48.7057, GNorm = 2.3299, lr_0 = 5.2038e-04
Loss = 2.8952e-01, PNorm = 48.7176, GNorm = 2.4998, lr_0 = 5.2413e-04
Loss = 2.2409e-01, PNorm = 48.7250, GNorm = 2.3444, lr_0 = 5.2788e-04
Loss = 2.5796e-01, PNorm = 48.7347, GNorm = 1.2419, lr_0 = 5.3162e-04
Loss = 2.8069e-01, PNorm = 48.7432, GNorm = 4.3401, lr_0 = 5.3538e-04
Loss = 2.9192e-01, PNorm = 48.7535, GNorm = 1.2044, lr_0 = 5.3912e-04
Loss = 3.0852e-01, PNorm = 48.7660, GNorm = 2.6708, lr_0 = 5.4288e-04
Loss = 3.1132e-01, PNorm = 48.7760, GNorm = 1.1977, lr_0 = 5.4663e-04
Loss = 2.6321e-01, PNorm = 48.7874, GNorm = 3.6927, lr_0 = 5.5038e-04
Validation mae = 0.310297
Epoch 1
Loss = 2.4079e-01, PNorm = 48.7949, GNorm = 2.0643, lr_0 = 5.5413e-04
Loss = 2.6610e-01, PNorm = 48.8059, GNorm = 5.1775, lr_0 = 5.5787e-04
Loss = 2.8531e-01, PNorm = 48.8180, GNorm = 3.3511, lr_0 = 5.6163e-04
Loss = 2.6205e-01, PNorm = 48.8305, GNorm = 0.8892, lr_0 = 5.6538e-04
Loss = 2.7553e-01, PNorm = 48.8443, GNorm = 1.2857, lr_0 = 5.6913e-04
Loss = 2.5812e-01, PNorm = 48.8575, GNorm = 3.7900, lr_0 = 5.7288e-04
Loss = 2.5279e-01, PNorm = 48.8657, GNorm = 4.2807, lr_0 = 5.7662e-04
Loss = 2.8122e-01, PNorm = 48.8780, GNorm = 3.9683, lr_0 = 5.8038e-04
Loss = 2.7137e-01, PNorm = 48.8890, GNorm = 2.4662, lr_0 = 5.8413e-04
Loss = 3.0105e-01, PNorm = 48.9022, GNorm = 2.3747, lr_0 = 5.8788e-04
Loss = 2.3998e-01, PNorm = 48.9158, GNorm = 5.2029, lr_0 = 5.9163e-04
Loss = 2.6871e-01, PNorm = 48.9273, GNorm = 0.7606, lr_0 = 5.9538e-04
Loss = 2.7534e-01, PNorm = 48.9337, GNorm = 1.2702, lr_0 = 5.9913e-04
Loss = 2.7000e-01, PNorm = 48.9530, GNorm = 3.1035, lr_0 = 6.0288e-04
Loss = 2.9934e-01, PNorm = 48.9707, GNorm = 3.1002, lr_0 = 6.0663e-04
Loss = 2.9782e-01, PNorm = 48.9865, GNorm = 5.2883, lr_0 = 6.1038e-04
Loss = 2.6629e-01, PNorm = 48.9983, GNorm = 2.4158, lr_0 = 6.1413e-04
Loss = 2.6187e-01, PNorm = 49.0125, GNorm = 3.1195, lr_0 = 6.1788e-04
Loss = 2.6098e-01, PNorm = 49.0272, GNorm = 0.7837, lr_0 = 6.2163e-04
Loss = 2.6493e-01, PNorm = 49.0368, GNorm = 3.7047, lr_0 = 6.2538e-04
Loss = 3.0384e-01, PNorm = 49.0533, GNorm = 6.2618, lr_0 = 6.2913e-04
Loss = 2.4695e-01, PNorm = 49.0726, GNorm = 4.2026, lr_0 = 6.3288e-04
Loss = 3.0860e-01, PNorm = 49.0857, GNorm = 2.8815, lr_0 = 6.3663e-04
Loss = 2.9593e-01, PNorm = 49.1050, GNorm = 4.8452, lr_0 = 6.4038e-04
Loss = 2.7535e-01, PNorm = 49.1190, GNorm = 2.5529, lr_0 = 6.4413e-04
Loss = 2.6552e-01, PNorm = 49.1329, GNorm = 1.6362, lr_0 = 6.4788e-04
Loss = 2.7425e-01, PNorm = 49.1443, GNorm = 0.9529, lr_0 = 6.5163e-04
Loss = 2.7575e-01, PNorm = 49.1618, GNorm = 4.3517, lr_0 = 6.5538e-04
Loss = 2.6347e-01, PNorm = 49.1819, GNorm = 1.2923, lr_0 = 6.5913e-04
Loss = 2.7514e-01, PNorm = 49.1884, GNorm = 3.8438, lr_0 = 6.6288e-04
Loss = 2.5328e-01, PNorm = 49.2050, GNorm = 3.4957, lr_0 = 6.6663e-04
Loss = 2.5914e-01, PNorm = 49.2174, GNorm = 1.4211, lr_0 = 6.7038e-04
Loss = 2.3820e-01, PNorm = 49.2351, GNorm = 3.7384, lr_0 = 6.7413e-04
Loss = 2.7884e-01, PNorm = 49.2466, GNorm = 5.6286, lr_0 = 6.7788e-04
Loss = 2.7735e-01, PNorm = 49.2644, GNorm = 6.6189, lr_0 = 6.8163e-04
Loss = 3.0813e-01, PNorm = 49.2913, GNorm = 2.6212, lr_0 = 6.8538e-04
Loss = 2.7712e-01, PNorm = 49.3209, GNorm = 1.4787, lr_0 = 6.8913e-04
Loss = 2.6361e-01, PNorm = 49.3439, GNorm = 2.0668, lr_0 = 6.9288e-04
Loss = 2.4382e-01, PNorm = 49.3578, GNorm = 3.8399, lr_0 = 6.9663e-04
Loss = 2.5976e-01, PNorm = 49.3675, GNorm = 1.0128, lr_0 = 7.0038e-04
Loss = 2.7372e-01, PNorm = 49.3819, GNorm = 1.3765, lr_0 = 7.0413e-04
Loss = 2.4641e-01, PNorm = 49.3903, GNorm = 3.6748, lr_0 = 7.0788e-04
Loss = 3.0137e-01, PNorm = 49.4020, GNorm = 6.6069, lr_0 = 7.1163e-04
Loss = 2.8029e-01, PNorm = 49.4238, GNorm = 1.8937, lr_0 = 7.1538e-04
Loss = 2.6344e-01, PNorm = 49.4461, GNorm = 2.3650, lr_0 = 7.1913e-04
Loss = 2.5118e-01, PNorm = 49.4685, GNorm = 5.8453, lr_0 = 7.2288e-04
Loss = 2.9513e-01, PNorm = 49.4863, GNorm = 6.6558, lr_0 = 7.2663e-04
Loss = 2.7301e-01, PNorm = 49.5080, GNorm = 4.1598, lr_0 = 7.3038e-04
Loss = 2.6576e-01, PNorm = 49.5321, GNorm = 5.3148, lr_0 = 7.3413e-04
Loss = 2.8704e-01, PNorm = 49.5520, GNorm = 1.9170, lr_0 = 7.3788e-04
Loss = 2.6744e-01, PNorm = 49.5797, GNorm = 3.9471, lr_0 = 7.4163e-04
Loss = 2.5674e-01, PNorm = 49.5923, GNorm = 2.7907, lr_0 = 7.4538e-04
Loss = 2.5818e-01, PNorm = 49.6065, GNorm = 2.1806, lr_0 = 7.4913e-04
Loss = 2.2261e-01, PNorm = 49.6243, GNorm = 4.1598, lr_0 = 7.5288e-04
Loss = 2.0857e-01, PNorm = 49.6376, GNorm = 2.2535, lr_0 = 7.5663e-04
Loss = 2.3430e-01, PNorm = 49.6541, GNorm = 0.8053, lr_0 = 7.6038e-04
Loss = 2.9352e-01, PNorm = 49.6646, GNorm = 9.2553, lr_0 = 7.6413e-04
Loss = 3.4110e-01, PNorm = 49.6792, GNorm = 0.8652, lr_0 = 7.6788e-04
Loss = 2.9694e-01, PNorm = 49.7046, GNorm = 4.1122, lr_0 = 7.7163e-04
Loss = 2.5987e-01, PNorm = 49.7287, GNorm = 2.2578, lr_0 = 7.7538e-04
Loss = 2.6877e-01, PNorm = 49.7370, GNorm = 2.8890, lr_0 = 7.7913e-04
Loss = 2.6821e-01, PNorm = 49.7517, GNorm = 2.5160, lr_0 = 7.8288e-04
Loss = 2.7619e-01, PNorm = 49.7760, GNorm = 0.8476, lr_0 = 7.8663e-04
Loss = 2.5876e-01, PNorm = 49.7942, GNorm = 1.1121, lr_0 = 7.9038e-04
Loss = 2.3988e-01, PNorm = 49.8153, GNorm = 2.4023, lr_0 = 7.9413e-04
Loss = 2.9479e-01, PNorm = 49.8396, GNorm = 3.5698, lr_0 = 7.9788e-04
Loss = 2.7522e-01, PNorm = 49.8614, GNorm = 1.7581, lr_0 = 8.0163e-04
Loss = 2.4552e-01, PNorm = 49.8792, GNorm = 2.1828, lr_0 = 8.0538e-04
Loss = 2.7158e-01, PNorm = 49.9070, GNorm = 1.6922, lr_0 = 8.0913e-04
Loss = 2.6707e-01, PNorm = 49.9293, GNorm = 2.2957, lr_0 = 8.1288e-04
Loss = 2.3227e-01, PNorm = 49.9495, GNorm = 2.6900, lr_0 = 8.1663e-04
Loss = 2.1096e-01, PNorm = 49.9692, GNorm = 1.4215, lr_0 = 8.2038e-04
Loss = 2.6604e-01, PNorm = 49.9833, GNorm = 1.8097, lr_0 = 8.2413e-04
Loss = 2.2986e-01, PNorm = 49.9974, GNorm = 1.6244, lr_0 = 8.2788e-04
Loss = 2.4256e-01, PNorm = 50.0218, GNorm = 5.0037, lr_0 = 8.3163e-04
Loss = 2.3336e-01, PNorm = 50.0330, GNorm = 3.6477, lr_0 = 8.3538e-04
Loss = 2.2267e-01, PNorm = 50.0524, GNorm = 1.3315, lr_0 = 8.3913e-04
Loss = 2.2711e-01, PNorm = 50.0663, GNorm = 0.7889, lr_0 = 8.4288e-04
Loss = 2.6877e-01, PNorm = 50.0868, GNorm = 6.4287, lr_0 = 8.4663e-04
Loss = 2.5907e-01, PNorm = 50.1011, GNorm = 1.3638, lr_0 = 8.5038e-04
Loss = 2.8201e-01, PNorm = 50.1178, GNorm = 6.2135, lr_0 = 8.5413e-04
Loss = 2.5828e-01, PNorm = 50.1378, GNorm = 0.7104, lr_0 = 8.5788e-04
Loss = 2.8391e-01, PNorm = 50.1628, GNorm = 3.0321, lr_0 = 8.6163e-04
Loss = 2.7351e-01, PNorm = 50.1788, GNorm = 3.8885, lr_0 = 8.6538e-04
Loss = 2.9448e-01, PNorm = 50.2080, GNorm = 3.8437, lr_0 = 8.6913e-04
Loss = 3.0714e-01, PNorm = 50.2446, GNorm = 4.7253, lr_0 = 8.7288e-04
Loss = 2.4290e-01, PNorm = 50.2707, GNorm = 1.0297, lr_0 = 8.7663e-04
Loss = 2.3610e-01, PNorm = 50.2955, GNorm = 2.9079, lr_0 = 8.8038e-04
Loss = 3.0205e-01, PNorm = 50.3185, GNorm = 2.1855, lr_0 = 8.8413e-04
Loss = 2.7444e-01, PNorm = 50.3456, GNorm = 5.9891, lr_0 = 8.8788e-04
Loss = 2.7635e-01, PNorm = 50.3680, GNorm = 1.9180, lr_0 = 8.9163e-04
Loss = 3.1588e-01, PNorm = 50.3975, GNorm = 1.1824, lr_0 = 8.9538e-04
Loss = 2.3865e-01, PNorm = 50.4265, GNorm = 1.2333, lr_0 = 8.9913e-04
Loss = 2.7803e-01, PNorm = 50.4449, GNorm = 1.3929, lr_0 = 9.0288e-04
Loss = 2.7369e-01, PNorm = 50.4665, GNorm = 2.4718, lr_0 = 9.0663e-04
Loss = 2.7419e-01, PNorm = 50.4849, GNorm = 2.5915, lr_0 = 9.1038e-04
Loss = 2.9189e-01, PNorm = 50.5162, GNorm = 2.8935, lr_0 = 9.1413e-04
Loss = 2.3581e-01, PNorm = 50.5368, GNorm = 2.1554, lr_0 = 9.1788e-04
Loss = 2.6724e-01, PNorm = 50.5635, GNorm = 1.4515, lr_0 = 9.2163e-04
Loss = 2.2074e-01, PNorm = 50.5755, GNorm = 0.9776, lr_0 = 9.2538e-04
Loss = 2.6207e-01, PNorm = 50.6025, GNorm = 3.6476, lr_0 = 9.2913e-04
Loss = 2.5973e-01, PNorm = 50.6274, GNorm = 0.9615, lr_0 = 9.3288e-04
Loss = 2.4936e-01, PNorm = 50.6660, GNorm = 3.3143, lr_0 = 9.3663e-04
Loss = 2.6923e-01, PNorm = 50.6944, GNorm = 0.8235, lr_0 = 9.4038e-04
Loss = 2.2396e-01, PNorm = 50.7181, GNorm = 2.1596, lr_0 = 9.4413e-04
Loss = 2.3513e-01, PNorm = 50.7410, GNorm = 2.5427, lr_0 = 9.4788e-04
Loss = 2.5501e-01, PNorm = 50.7643, GNorm = 1.9117, lr_0 = 9.5163e-04
Loss = 2.2008e-01, PNorm = 50.7853, GNorm = 2.4747, lr_0 = 9.5538e-04
Loss = 2.7778e-01, PNorm = 50.8099, GNorm = 4.9168, lr_0 = 9.5913e-04
Loss = 3.2403e-01, PNorm = 50.8338, GNorm = 5.0428, lr_0 = 9.6288e-04
Loss = 3.7105e-01, PNorm = 50.8827, GNorm = 3.9483, lr_0 = 9.6663e-04
Loss = 2.6567e-01, PNorm = 50.9256, GNorm = 1.3619, lr_0 = 9.7038e-04
Loss = 2.3800e-01, PNorm = 50.9628, GNorm = 1.4461, lr_0 = 9.7413e-04
Loss = 2.4771e-01, PNorm = 50.9909, GNorm = 0.9543, lr_0 = 9.7788e-04
Loss = 2.4982e-01, PNorm = 51.0168, GNorm = 1.7299, lr_0 = 9.8163e-04
Loss = 2.3384e-01, PNorm = 51.0404, GNorm = 0.9546, lr_0 = 9.8537e-04
Loss = 2.9000e-01, PNorm = 51.0649, GNorm = 1.1389, lr_0 = 9.8912e-04
Loss = 2.4054e-01, PNorm = 51.0928, GNorm = 1.8158, lr_0 = 9.9288e-04
Loss = 2.3550e-01, PNorm = 51.1215, GNorm = 2.3449, lr_0 = 9.9663e-04
Loss = 2.5479e-01, PNorm = 51.1459, GNorm = 4.3755, lr_0 = 9.9993e-04
Validation mae = 0.291182
Epoch 2
Loss = 2.5379e-01, PNorm = 51.1780, GNorm = 1.1287, lr_0 = 9.9925e-04
Loss = 2.7520e-01, PNorm = 51.2156, GNorm = 2.9016, lr_0 = 9.9856e-04
Loss = 2.2306e-01, PNorm = 51.2448, GNorm = 1.3452, lr_0 = 9.9788e-04
Loss = 2.4525e-01, PNorm = 51.2796, GNorm = 4.4651, lr_0 = 9.9719e-04
Loss = 2.4220e-01, PNorm = 51.2961, GNorm = 1.3920, lr_0 = 9.9651e-04
Loss = 2.4687e-01, PNorm = 51.3181, GNorm = 4.4396, lr_0 = 9.9583e-04
Loss = 2.6421e-01, PNorm = 51.3500, GNorm = 1.6137, lr_0 = 9.9515e-04
Loss = 2.6849e-01, PNorm = 51.3777, GNorm = 2.4120, lr_0 = 9.9446e-04
Loss = 2.6305e-01, PNorm = 51.4123, GNorm = 2.7223, lr_0 = 9.9378e-04
Loss = 2.4728e-01, PNorm = 51.4350, GNorm = 2.1747, lr_0 = 9.9310e-04
Loss = 2.6876e-01, PNorm = 51.4683, GNorm = 1.4055, lr_0 = 9.9242e-04
Loss = 2.6191e-01, PNorm = 51.5112, GNorm = 3.1880, lr_0 = 9.9174e-04
Loss = 2.3789e-01, PNorm = 51.5323, GNorm = 2.9657, lr_0 = 9.9106e-04
Loss = 2.2377e-01, PNorm = 51.5635, GNorm = 1.7768, lr_0 = 9.9038e-04
Loss = 2.2516e-01, PNorm = 51.5984, GNorm = 0.6380, lr_0 = 9.8971e-04
Loss = 2.5338e-01, PNorm = 51.6333, GNorm = 3.6112, lr_0 = 9.8903e-04
Loss = 2.4853e-01, PNorm = 51.6658, GNorm = 0.9639, lr_0 = 9.8835e-04
Loss = 2.5815e-01, PNorm = 51.6830, GNorm = 1.3996, lr_0 = 9.8767e-04
Loss = 2.3407e-01, PNorm = 51.7138, GNorm = 4.7401, lr_0 = 9.8700e-04
Loss = 2.3831e-01, PNorm = 51.7532, GNorm = 0.7400, lr_0 = 9.8632e-04
Loss = 2.4147e-01, PNorm = 51.7828, GNorm = 2.8845, lr_0 = 9.8564e-04
Loss = 2.4917e-01, PNorm = 51.8145, GNorm = 1.5410, lr_0 = 9.8497e-04
Loss = 2.4199e-01, PNorm = 51.8385, GNorm = 0.9426, lr_0 = 9.8429e-04
Loss = 2.3005e-01, PNorm = 51.8536, GNorm = 1.9189, lr_0 = 9.8362e-04
Loss = 2.3422e-01, PNorm = 51.8847, GNorm = 2.3053, lr_0 = 9.8295e-04
Loss = 2.5098e-01, PNorm = 51.9075, GNorm = 1.5289, lr_0 = 9.8227e-04
Loss = 2.3275e-01, PNorm = 51.9278, GNorm = 1.0385, lr_0 = 9.8160e-04
Loss = 2.4116e-01, PNorm = 51.9509, GNorm = 1.6391, lr_0 = 9.8093e-04
Loss = 2.3267e-01, PNorm = 51.9711, GNorm = 0.7234, lr_0 = 9.8026e-04
Loss = 2.6152e-01, PNorm = 52.0030, GNorm = 2.8648, lr_0 = 9.7958e-04
Loss = 2.7455e-01, PNorm = 52.0217, GNorm = 0.9612, lr_0 = 9.7891e-04
Loss = 2.5963e-01, PNorm = 52.0574, GNorm = 5.2579, lr_0 = 9.7824e-04
Loss = 2.5304e-01, PNorm = 52.0865, GNorm = 1.9242, lr_0 = 9.7757e-04
Loss = 2.4091e-01, PNorm = 52.1135, GNorm = 2.2617, lr_0 = 9.7690e-04
Loss = 2.3433e-01, PNorm = 52.1418, GNorm = 1.3961, lr_0 = 9.7623e-04
Loss = 2.2806e-01, PNorm = 52.1727, GNorm = 0.9941, lr_0 = 9.7556e-04
Loss = 2.4853e-01, PNorm = 52.1855, GNorm = 2.2034, lr_0 = 9.7490e-04
Loss = 2.6083e-01, PNorm = 52.2094, GNorm = 1.0875, lr_0 = 9.7423e-04
Loss = 2.2838e-01, PNorm = 52.2347, GNorm = 2.7049, lr_0 = 9.7356e-04
Loss = 2.3807e-01, PNorm = 52.2728, GNorm = 2.9110, lr_0 = 9.7289e-04
Loss = 2.4295e-01, PNorm = 52.3026, GNorm = 1.3466, lr_0 = 9.7223e-04
Loss = 2.0261e-01, PNorm = 52.3334, GNorm = 0.6634, lr_0 = 9.7156e-04
Loss = 1.9583e-01, PNorm = 52.3552, GNorm = 1.6924, lr_0 = 9.7090e-04
Loss = 2.2495e-01, PNorm = 52.3763, GNorm = 1.3273, lr_0 = 9.7023e-04
Loss = 2.4239e-01, PNorm = 52.4068, GNorm = 1.1836, lr_0 = 9.6957e-04
Loss = 2.5333e-01, PNorm = 52.4233, GNorm = 1.5274, lr_0 = 9.6890e-04
Loss = 2.5274e-01, PNorm = 52.4568, GNorm = 0.7204, lr_0 = 9.6824e-04
Loss = 2.1445e-01, PNorm = 52.4796, GNorm = 1.5246, lr_0 = 9.6757e-04
Loss = 2.1568e-01, PNorm = 52.5055, GNorm = 3.0578, lr_0 = 9.6691e-04
Loss = 2.0057e-01, PNorm = 52.5267, GNorm = 1.3286, lr_0 = 9.6625e-04
Loss = 2.2720e-01, PNorm = 52.5514, GNorm = 1.9737, lr_0 = 9.6559e-04
Loss = 2.3597e-01, PNorm = 52.5664, GNorm = 1.9136, lr_0 = 9.6493e-04
Loss = 2.5670e-01, PNorm = 52.5881, GNorm = 1.1771, lr_0 = 9.6427e-04
Loss = 2.1709e-01, PNorm = 52.6167, GNorm = 1.7855, lr_0 = 9.6360e-04
Loss = 2.3800e-01, PNorm = 52.6361, GNorm = 1.4780, lr_0 = 9.6294e-04
Loss = 2.3751e-01, PNorm = 52.6663, GNorm = 1.0373, lr_0 = 9.6228e-04
Loss = 2.6389e-01, PNorm = 52.6978, GNorm = 0.7776, lr_0 = 9.6163e-04
Loss = 2.2139e-01, PNorm = 52.7250, GNorm = 1.7450, lr_0 = 9.6097e-04
Loss = 2.2973e-01, PNorm = 52.7540, GNorm = 1.3241, lr_0 = 9.6031e-04
Loss = 2.8487e-01, PNorm = 52.7682, GNorm = 0.9250, lr_0 = 9.5965e-04
Loss = 2.2815e-01, PNorm = 52.7979, GNorm = 1.0246, lr_0 = 9.5899e-04
Loss = 2.2396e-01, PNorm = 52.8279, GNorm = 0.7269, lr_0 = 9.5834e-04
Loss = 2.0641e-01, PNorm = 52.8537, GNorm = 3.1733, lr_0 = 9.5768e-04
Loss = 2.0103e-01, PNorm = 52.8808, GNorm = 1.1746, lr_0 = 9.5702e-04
Loss = 1.8467e-01, PNorm = 52.9012, GNorm = 1.9943, lr_0 = 9.5637e-04
Loss = 2.3854e-01, PNorm = 52.9246, GNorm = 1.0514, lr_0 = 9.5571e-04
Loss = 2.3237e-01, PNorm = 52.9571, GNorm = 0.6644, lr_0 = 9.5506e-04
Loss = 2.0169e-01, PNorm = 52.9912, GNorm = 1.5214, lr_0 = 9.5440e-04
Loss = 2.4840e-01, PNorm = 53.0135, GNorm = 1.1758, lr_0 = 9.5375e-04
Loss = 2.0690e-01, PNorm = 53.0386, GNorm = 2.5558, lr_0 = 9.5310e-04
Loss = 2.2352e-01, PNorm = 53.0684, GNorm = 1.0846, lr_0 = 9.5244e-04
Loss = 2.2324e-01, PNorm = 53.0970, GNorm = 1.6005, lr_0 = 9.5179e-04
Loss = 2.4022e-01, PNorm = 53.1064, GNorm = 1.6954, lr_0 = 9.5114e-04
Loss = 2.5417e-01, PNorm = 53.1265, GNorm = 3.3155, lr_0 = 9.5049e-04
Loss = 2.3421e-01, PNorm = 53.1521, GNorm = 1.2190, lr_0 = 9.4984e-04
Loss = 1.9817e-01, PNorm = 53.1737, GNorm = 1.8278, lr_0 = 9.4919e-04
Loss = 2.0451e-01, PNorm = 53.1968, GNorm = 3.1304, lr_0 = 9.4854e-04
Loss = 2.3924e-01, PNorm = 53.2233, GNorm = 0.7473, lr_0 = 9.4789e-04
Loss = 2.1258e-01, PNorm = 53.2431, GNorm = 1.0864, lr_0 = 9.4724e-04
Loss = 2.1284e-01, PNorm = 53.2692, GNorm = 2.0458, lr_0 = 9.4659e-04
Loss = 2.3325e-01, PNorm = 53.2986, GNorm = 2.7340, lr_0 = 9.4594e-04
Loss = 2.2912e-01, PNorm = 53.3290, GNorm = 2.6681, lr_0 = 9.4529e-04
Loss = 2.3661e-01, PNorm = 53.3566, GNorm = 1.9465, lr_0 = 9.4464e-04
Loss = 2.4229e-01, PNorm = 53.3788, GNorm = 2.1450, lr_0 = 9.4400e-04
Loss = 2.2093e-01, PNorm = 53.4033, GNorm = 1.8706, lr_0 = 9.4335e-04
Loss = 2.5849e-01, PNorm = 53.4336, GNorm = 1.8853, lr_0 = 9.4270e-04
Loss = 2.3265e-01, PNorm = 53.4626, GNorm = 1.2018, lr_0 = 9.4206e-04
Loss = 2.3497e-01, PNorm = 53.4965, GNorm = 2.8577, lr_0 = 9.4141e-04
Loss = 2.5100e-01, PNorm = 53.5227, GNorm = 1.1670, lr_0 = 9.4077e-04
Loss = 2.2663e-01, PNorm = 53.5494, GNorm = 2.8429, lr_0 = 9.4012e-04
Loss = 2.6220e-01, PNorm = 53.5776, GNorm = 1.0724, lr_0 = 9.3948e-04
Loss = 2.6216e-01, PNorm = 53.6117, GNorm = 1.0269, lr_0 = 9.3884e-04
Loss = 2.4777e-01, PNorm = 53.6483, GNorm = 1.1246, lr_0 = 9.3819e-04
Loss = 2.2368e-01, PNorm = 53.6691, GNorm = 1.0014, lr_0 = 9.3755e-04
Loss = 2.0397e-01, PNorm = 53.6933, GNorm = 1.0635, lr_0 = 9.3691e-04
Loss = 2.1123e-01, PNorm = 53.7070, GNorm = 2.6231, lr_0 = 9.3627e-04
Loss = 2.4402e-01, PNorm = 53.7333, GNorm = 2.0951, lr_0 = 9.3562e-04
Loss = 2.0730e-01, PNorm = 53.7559, GNorm = 0.4406, lr_0 = 9.3498e-04
Loss = 2.1341e-01, PNorm = 53.7876, GNorm = 0.8002, lr_0 = 9.3434e-04
Loss = 2.1898e-01, PNorm = 53.8168, GNorm = 1.6008, lr_0 = 9.3370e-04
Loss = 2.2869e-01, PNorm = 53.8478, GNorm = 2.8945, lr_0 = 9.3306e-04
Loss = 2.4194e-01, PNorm = 53.8721, GNorm = 2.1619, lr_0 = 9.3242e-04
Loss = 2.6528e-01, PNorm = 53.9030, GNorm = 3.9560, lr_0 = 9.3178e-04
Loss = 2.7960e-01, PNorm = 53.9513, GNorm = 2.0022, lr_0 = 9.3115e-04
Loss = 1.9585e-01, PNorm = 53.9847, GNorm = 2.1818, lr_0 = 9.3051e-04
Loss = 2.2208e-01, PNorm = 54.0151, GNorm = 2.4259, lr_0 = 9.2987e-04
Loss = 2.3464e-01, PNorm = 54.0402, GNorm = 1.8890, lr_0 = 9.2923e-04
Loss = 2.3543e-01, PNorm = 54.0739, GNorm = 1.7447, lr_0 = 9.2860e-04
Loss = 2.1849e-01, PNorm = 54.0924, GNorm = 0.5991, lr_0 = 9.2796e-04
Loss = 1.8532e-01, PNorm = 54.1173, GNorm = 1.3415, lr_0 = 9.2733e-04
Loss = 2.2658e-01, PNorm = 54.1368, GNorm = 3.2005, lr_0 = 9.2669e-04
Loss = 2.2391e-01, PNorm = 54.1605, GNorm = 0.9695, lr_0 = 9.2606e-04
Loss = 2.0267e-01, PNorm = 54.1793, GNorm = 0.7223, lr_0 = 9.2542e-04
Loss = 2.0235e-01, PNorm = 54.2040, GNorm = 0.9682, lr_0 = 9.2479e-04
Loss = 2.2187e-01, PNorm = 54.2251, GNorm = 1.8341, lr_0 = 9.2415e-04
Loss = 1.9671e-01, PNorm = 54.2534, GNorm = 1.0364, lr_0 = 9.2352e-04
Loss = 2.3613e-01, PNorm = 54.2785, GNorm = 1.7910, lr_0 = 9.2289e-04
Loss = 2.2558e-01, PNorm = 54.3088, GNorm = 0.8663, lr_0 = 9.2226e-04
Loss = 2.0343e-01, PNorm = 54.3307, GNorm = 1.6348, lr_0 = 9.2162e-04
Loss = 1.9037e-01, PNorm = 54.3574, GNorm = 2.9177, lr_0 = 9.2099e-04
Validation mae = 0.280871
Epoch 3
Loss = 2.2368e-01, PNorm = 54.3686, GNorm = 0.9775, lr_0 = 9.2036e-04
Loss = 2.2310e-01, PNorm = 54.3957, GNorm = 1.8972, lr_0 = 9.1973e-04
Loss = 2.0514e-01, PNorm = 54.4121, GNorm = 1.5084, lr_0 = 9.1910e-04
Loss = 2.1215e-01, PNorm = 54.4413, GNorm = 2.6606, lr_0 = 9.1847e-04
Loss = 2.0084e-01, PNorm = 54.4602, GNorm = 0.8860, lr_0 = 9.1784e-04
Loss = 1.9244e-01, PNorm = 54.4814, GNorm = 2.1917, lr_0 = 9.1721e-04
Loss = 2.1487e-01, PNorm = 54.5096, GNorm = 1.5287, lr_0 = 9.1658e-04
Loss = 2.1889e-01, PNorm = 54.5425, GNorm = 0.7491, lr_0 = 9.1596e-04
Loss = 2.0751e-01, PNorm = 54.5643, GNorm = 0.8506, lr_0 = 9.1533e-04
Loss = 1.8412e-01, PNorm = 54.5934, GNorm = 1.2756, lr_0 = 9.1470e-04
Loss = 2.0276e-01, PNorm = 54.6140, GNorm = 1.0037, lr_0 = 9.1408e-04
Loss = 1.9452e-01, PNorm = 54.6392, GNorm = 0.7597, lr_0 = 9.1345e-04
Loss = 1.8966e-01, PNorm = 54.6701, GNorm = 1.6425, lr_0 = 9.1282e-04
Loss = 1.8800e-01, PNorm = 54.6948, GNorm = 2.0235, lr_0 = 9.1220e-04
Loss = 1.9591e-01, PNorm = 54.7318, GNorm = 1.9046, lr_0 = 9.1157e-04
Loss = 2.1382e-01, PNorm = 54.7574, GNorm = 1.6263, lr_0 = 9.1095e-04
Loss = 2.0467e-01, PNorm = 54.7860, GNorm = 1.5278, lr_0 = 9.1032e-04
Loss = 1.8458e-01, PNorm = 54.8151, GNorm = 0.6449, lr_0 = 9.0970e-04
Loss = 2.0393e-01, PNorm = 54.8419, GNorm = 1.5051, lr_0 = 9.0908e-04
Loss = 2.0922e-01, PNorm = 54.8725, GNorm = 0.9282, lr_0 = 9.0846e-04
Loss = 2.0149e-01, PNorm = 54.8885, GNorm = 1.1799, lr_0 = 9.0783e-04
Loss = 1.9670e-01, PNorm = 54.9216, GNorm = 0.9058, lr_0 = 9.0721e-04
Loss = 1.9854e-01, PNorm = 54.9413, GNorm = 1.4449, lr_0 = 9.0659e-04
Loss = 2.1657e-01, PNorm = 54.9715, GNorm = 1.6676, lr_0 = 9.0597e-04
Loss = 2.2053e-01, PNorm = 54.9901, GNorm = 3.2006, lr_0 = 9.0535e-04
Loss = 2.1341e-01, PNorm = 55.0117, GNorm = 1.4915, lr_0 = 9.0473e-04
Loss = 2.3539e-01, PNorm = 55.0298, GNorm = 0.7518, lr_0 = 9.0411e-04
Loss = 2.0133e-01, PNorm = 55.0459, GNorm = 0.8836, lr_0 = 9.0349e-04
Loss = 2.2864e-01, PNorm = 55.0690, GNorm = 0.8224, lr_0 = 9.0287e-04
Loss = 2.1373e-01, PNorm = 55.0910, GNorm = 1.6466, lr_0 = 9.0225e-04
Loss = 2.1845e-01, PNorm = 55.1263, GNorm = 1.4887, lr_0 = 9.0163e-04
Loss = 1.8399e-01, PNorm = 55.1441, GNorm = 1.1404, lr_0 = 9.0102e-04
Loss = 2.2201e-01, PNorm = 55.1699, GNorm = 0.9074, lr_0 = 9.0040e-04
Loss = 2.2265e-01, PNorm = 55.1936, GNorm = 2.1133, lr_0 = 8.9978e-04
Loss = 2.2856e-01, PNorm = 55.2378, GNorm = 0.7185, lr_0 = 8.9916e-04
Loss = 1.9319e-01, PNorm = 55.2642, GNorm = 2.1135, lr_0 = 8.9855e-04
Loss = 2.0440e-01, PNorm = 55.3025, GNorm = 0.6489, lr_0 = 8.9793e-04
Loss = 1.9024e-01, PNorm = 55.3260, GNorm = 2.8486, lr_0 = 8.9732e-04
Loss = 1.9525e-01, PNorm = 55.3434, GNorm = 0.7755, lr_0 = 8.9670e-04
Loss = 2.2131e-01, PNorm = 55.3621, GNorm = 2.5028, lr_0 = 8.9609e-04
Loss = 1.7396e-01, PNorm = 55.3911, GNorm = 1.1142, lr_0 = 8.9548e-04
Loss = 2.0191e-01, PNorm = 55.4126, GNorm = 1.3011, lr_0 = 8.9486e-04
Loss = 1.7733e-01, PNorm = 55.4414, GNorm = 1.5442, lr_0 = 8.9425e-04
Loss = 2.1748e-01, PNorm = 55.4597, GNorm = 0.8884, lr_0 = 8.9364e-04
Loss = 1.7717e-01, PNorm = 55.4869, GNorm = 1.0138, lr_0 = 8.9302e-04
Loss = 1.8664e-01, PNorm = 55.5058, GNorm = 1.5990, lr_0 = 8.9241e-04
Loss = 2.0097e-01, PNorm = 55.5273, GNorm = 1.2673, lr_0 = 8.9180e-04
Loss = 2.1160e-01, PNorm = 55.5559, GNorm = 1.6926, lr_0 = 8.9119e-04
Loss = 2.0641e-01, PNorm = 55.5865, GNorm = 0.7867, lr_0 = 8.9058e-04
Loss = 2.1708e-01, PNorm = 55.6205, GNorm = 0.9234, lr_0 = 8.8997e-04
Loss = 1.9401e-01, PNorm = 55.6453, GNorm = 0.6140, lr_0 = 8.8936e-04
Loss = 2.0535e-01, PNorm = 55.6777, GNorm = 1.7480, lr_0 = 8.8875e-04
Loss = 1.9393e-01, PNorm = 55.7098, GNorm = 1.6090, lr_0 = 8.8814e-04
Loss = 2.0189e-01, PNorm = 55.7309, GNorm = 1.5617, lr_0 = 8.8753e-04
Loss = 2.4221e-01, PNorm = 55.7413, GNorm = 1.6849, lr_0 = 8.8693e-04
Loss = 2.0825e-01, PNorm = 55.7683, GNorm = 1.2357, lr_0 = 8.8632e-04
Loss = 2.2392e-01, PNorm = 55.7972, GNorm = 1.7954, lr_0 = 8.8571e-04
Loss = 2.1752e-01, PNorm = 55.8303, GNorm = 1.2305, lr_0 = 8.8510e-04
Loss = 1.9964e-01, PNorm = 55.8542, GNorm = 1.1339, lr_0 = 8.8450e-04
Loss = 2.3639e-01, PNorm = 55.8849, GNorm = 1.7868, lr_0 = 8.8389e-04
Loss = 2.0602e-01, PNorm = 55.9166, GNorm = 0.7741, lr_0 = 8.8329e-04
Loss = 2.0025e-01, PNorm = 55.9404, GNorm = 1.0384, lr_0 = 8.8268e-04
Loss = 1.9877e-01, PNorm = 55.9680, GNorm = 2.9423, lr_0 = 8.8208e-04
Loss = 2.0194e-01, PNorm = 55.9991, GNorm = 0.9473, lr_0 = 8.8147e-04
Loss = 2.1588e-01, PNorm = 56.0282, GNorm = 1.0979, lr_0 = 8.8087e-04
Loss = 2.0588e-01, PNorm = 56.0626, GNorm = 1.2813, lr_0 = 8.8026e-04
Loss = 2.2382e-01, PNorm = 56.0879, GNorm = 0.9344, lr_0 = 8.7966e-04
Loss = 2.0735e-01, PNorm = 56.1173, GNorm = 0.9663, lr_0 = 8.7906e-04
Loss = 1.9047e-01, PNorm = 56.1447, GNorm = 1.9873, lr_0 = 8.7846e-04
Loss = 2.1739e-01, PNorm = 56.1750, GNorm = 1.2813, lr_0 = 8.7785e-04
Loss = 2.1891e-01, PNorm = 56.2042, GNorm = 1.3423, lr_0 = 8.7725e-04
Loss = 2.5847e-01, PNorm = 56.2329, GNorm = 2.2507, lr_0 = 8.7665e-04
Loss = 2.4748e-01, PNorm = 56.2803, GNorm = 2.1883, lr_0 = 8.7605e-04
Loss = 1.9384e-01, PNorm = 56.3177, GNorm = 1.1705, lr_0 = 8.7545e-04
Loss = 2.2934e-01, PNorm = 56.3404, GNorm = 0.6878, lr_0 = 8.7485e-04
Loss = 1.7150e-01, PNorm = 56.3668, GNorm = 0.5997, lr_0 = 8.7425e-04
Loss = 2.1463e-01, PNorm = 56.3953, GNorm = 1.2558, lr_0 = 8.7365e-04
Loss = 2.0838e-01, PNorm = 56.4189, GNorm = 1.5519, lr_0 = 8.7306e-04
Loss = 2.1949e-01, PNorm = 56.4416, GNorm = 1.9737, lr_0 = 8.7246e-04
Loss = 2.2334e-01, PNorm = 56.4689, GNorm = 2.0358, lr_0 = 8.7186e-04
Loss = 1.9624e-01, PNorm = 56.4878, GNorm = 1.5071, lr_0 = 8.7126e-04
Loss = 1.7396e-01, PNorm = 56.5161, GNorm = 1.4193, lr_0 = 8.7067e-04
Loss = 2.0311e-01, PNorm = 56.5369, GNorm = 1.0344, lr_0 = 8.7007e-04
Loss = 2.0985e-01, PNorm = 56.5592, GNorm = 0.9232, lr_0 = 8.6947e-04
Loss = 1.9813e-01, PNorm = 56.5775, GNorm = 1.1510, lr_0 = 8.6888e-04
Loss = 2.2028e-01, PNorm = 56.5953, GNorm = 1.5712, lr_0 = 8.6828e-04
Loss = 2.0530e-01, PNorm = 56.6128, GNorm = 2.2341, lr_0 = 8.6769e-04
Loss = 1.9720e-01, PNorm = 56.6378, GNorm = 1.9529, lr_0 = 8.6709e-04
Loss = 1.7854e-01, PNorm = 56.6559, GNorm = 0.9960, lr_0 = 8.6650e-04
Loss = 1.9738e-01, PNorm = 56.6785, GNorm = 1.8519, lr_0 = 8.6590e-04
Loss = 1.8756e-01, PNorm = 56.6988, GNorm = 1.3697, lr_0 = 8.6531e-04
Loss = 1.7940e-01, PNorm = 56.7171, GNorm = 0.6569, lr_0 = 8.6472e-04
Loss = 1.9876e-01, PNorm = 56.7371, GNorm = 1.3433, lr_0 = 8.6413e-04
Loss = 1.9468e-01, PNorm = 56.7649, GNorm = 2.4469, lr_0 = 8.6353e-04
Loss = 1.9720e-01, PNorm = 56.7936, GNorm = 1.0202, lr_0 = 8.6294e-04
Loss = 1.9094e-01, PNorm = 56.8207, GNorm = 1.1029, lr_0 = 8.6235e-04
Loss = 1.9918e-01, PNorm = 56.8447, GNorm = 1.2354, lr_0 = 8.6176e-04
Loss = 2.1127e-01, PNorm = 56.8648, GNorm = 1.2569, lr_0 = 8.6117e-04
Loss = 2.1140e-01, PNorm = 56.8887, GNorm = 2.0417, lr_0 = 8.6058e-04
Loss = 2.1700e-01, PNorm = 56.9127, GNorm = 2.4398, lr_0 = 8.5999e-04
Loss = 2.0682e-01, PNorm = 56.9473, GNorm = 0.7323, lr_0 = 8.5940e-04
Loss = 1.9698e-01, PNorm = 56.9660, GNorm = 0.5946, lr_0 = 8.5881e-04
Loss = 2.0206e-01, PNorm = 56.9966, GNorm = 1.6780, lr_0 = 8.5823e-04
Loss = 1.9705e-01, PNorm = 57.0205, GNorm = 0.8212, lr_0 = 8.5764e-04
Loss = 2.0943e-01, PNorm = 57.0510, GNorm = 0.9002, lr_0 = 8.5705e-04
Loss = 2.1037e-01, PNorm = 57.0796, GNorm = 1.0037, lr_0 = 8.5646e-04
Loss = 2.1282e-01, PNorm = 57.1011, GNorm = 1.2795, lr_0 = 8.5588e-04
Loss = 2.2693e-01, PNorm = 57.1254, GNorm = 0.7618, lr_0 = 8.5529e-04
Loss = 1.9490e-01, PNorm = 57.1462, GNorm = 1.4200, lr_0 = 8.5470e-04
Loss = 2.1205e-01, PNorm = 57.1747, GNorm = 1.1172, lr_0 = 8.5412e-04
Loss = 2.2485e-01, PNorm = 57.1987, GNorm = 1.9240, lr_0 = 8.5353e-04
Loss = 2.0946e-01, PNorm = 57.2350, GNorm = 1.3317, lr_0 = 8.5295e-04
Loss = 1.7979e-01, PNorm = 57.2629, GNorm = 0.8289, lr_0 = 8.5236e-04
Loss = 1.9704e-01, PNorm = 57.2831, GNorm = 0.8947, lr_0 = 8.5178e-04
Loss = 1.7142e-01, PNorm = 57.3064, GNorm = 0.9835, lr_0 = 8.5120e-04
Loss = 1.9982e-01, PNorm = 57.3252, GNorm = 0.8161, lr_0 = 8.5061e-04
Loss = 1.9573e-01, PNorm = 57.3447, GNorm = 0.9388, lr_0 = 8.5003e-04
Loss = 1.9070e-01, PNorm = 57.3638, GNorm = 1.2648, lr_0 = 8.4945e-04
Loss = 2.0555e-01, PNorm = 57.3807, GNorm = 1.2089, lr_0 = 8.4887e-04
Loss = 1.6438e-01, PNorm = 57.4094, GNorm = 0.8674, lr_0 = 8.4828e-04
Validation mae = 0.287521
Epoch 4
Loss = 1.8617e-01, PNorm = 57.4346, GNorm = 0.9857, lr_0 = 8.4770e-04
Loss = 2.0805e-01, PNorm = 57.4655, GNorm = 1.8280, lr_0 = 8.4712e-04
Loss = 1.8854e-01, PNorm = 57.4971, GNorm = 0.4984, lr_0 = 8.4654e-04
Loss = 1.9936e-01, PNorm = 57.5268, GNorm = 1.1695, lr_0 = 8.4596e-04
Loss = 2.1774e-01, PNorm = 57.5532, GNorm = 1.2693, lr_0 = 8.4538e-04
Loss = 1.9350e-01, PNorm = 57.5907, GNorm = 0.7963, lr_0 = 8.4480e-04
Loss = 2.0854e-01, PNorm = 57.6346, GNorm = 0.7602, lr_0 = 8.4423e-04
Loss = 2.1454e-01, PNorm = 57.6642, GNorm = 1.0386, lr_0 = 8.4365e-04
Loss = 2.1105e-01, PNorm = 57.7048, GNorm = 0.6884, lr_0 = 8.4307e-04
Loss = 1.7210e-01, PNorm = 57.7380, GNorm = 0.7260, lr_0 = 8.4249e-04
Loss = 1.9351e-01, PNorm = 57.7652, GNorm = 0.7694, lr_0 = 8.4191e-04
Loss = 1.8499e-01, PNorm = 57.7942, GNorm = 2.9491, lr_0 = 8.4134e-04
Loss = 1.9842e-01, PNorm = 57.8235, GNorm = 1.1447, lr_0 = 8.4076e-04
Loss = 1.7347e-01, PNorm = 57.8518, GNorm = 1.0854, lr_0 = 8.4019e-04
Loss = 1.9297e-01, PNorm = 57.8806, GNorm = 1.4077, lr_0 = 8.3961e-04
Loss = 1.8157e-01, PNorm = 57.8997, GNorm = 0.5107, lr_0 = 8.3903e-04
Loss = 1.8731e-01, PNorm = 57.9239, GNorm = 1.6648, lr_0 = 8.3846e-04
Loss = 1.6330e-01, PNorm = 57.9382, GNorm = 1.6107, lr_0 = 8.3789e-04
Loss = 1.7908e-01, PNorm = 57.9606, GNorm = 1.2682, lr_0 = 8.3731e-04
Loss = 1.7690e-01, PNorm = 57.9832, GNorm = 1.0019, lr_0 = 8.3674e-04
Loss = 1.8568e-01, PNorm = 58.0097, GNorm = 0.6104, lr_0 = 8.3616e-04
Loss = 1.7475e-01, PNorm = 58.0306, GNorm = 1.0669, lr_0 = 8.3559e-04
Loss = 1.6609e-01, PNorm = 58.0575, GNorm = 1.3997, lr_0 = 8.3502e-04
Loss = 1.9022e-01, PNorm = 58.0779, GNorm = 1.2272, lr_0 = 8.3445e-04
Loss = 1.8361e-01, PNorm = 58.1058, GNorm = 1.2049, lr_0 = 8.3388e-04
Loss = 1.9039e-01, PNorm = 58.1288, GNorm = 0.6173, lr_0 = 8.3330e-04
Loss = 1.8344e-01, PNorm = 58.1627, GNorm = 0.8427, lr_0 = 8.3273e-04
Loss = 1.7659e-01, PNorm = 58.1835, GNorm = 0.7611, lr_0 = 8.3216e-04
Loss = 1.8773e-01, PNorm = 58.2053, GNorm = 0.9295, lr_0 = 8.3159e-04
Loss = 1.8963e-01, PNorm = 58.2279, GNorm = 1.7935, lr_0 = 8.3102e-04
Loss = 1.8487e-01, PNorm = 58.2560, GNorm = 0.6634, lr_0 = 8.3045e-04
Loss = 1.9292e-01, PNorm = 58.2768, GNorm = 1.3252, lr_0 = 8.2988e-04
Loss = 1.7283e-01, PNorm = 58.2987, GNorm = 0.6901, lr_0 = 8.2932e-04
Loss = 1.8316e-01, PNorm = 58.3156, GNorm = 1.4381, lr_0 = 8.2875e-04
Loss = 1.9279e-01, PNorm = 58.3484, GNorm = 1.7983, lr_0 = 8.2818e-04
Loss = 1.9546e-01, PNorm = 58.3779, GNorm = 1.3960, lr_0 = 8.2761e-04
Loss = 2.0808e-01, PNorm = 58.4079, GNorm = 1.5305, lr_0 = 8.2705e-04
Loss = 1.8569e-01, PNorm = 58.4372, GNorm = 1.3694, lr_0 = 8.2648e-04
Loss = 1.8064e-01, PNorm = 58.4637, GNorm = 0.6694, lr_0 = 8.2591e-04
Loss = 1.9893e-01, PNorm = 58.4884, GNorm = 1.7430, lr_0 = 8.2535e-04
Loss = 1.9378e-01, PNorm = 58.5110, GNorm = 0.9956, lr_0 = 8.2478e-04
Loss = 1.8456e-01, PNorm = 58.5285, GNorm = 0.6609, lr_0 = 8.2422e-04
Loss = 1.9563e-01, PNorm = 58.5573, GNorm = 0.7013, lr_0 = 8.2365e-04
Loss = 2.0063e-01, PNorm = 58.5874, GNorm = 0.8449, lr_0 = 8.2309e-04
Loss = 1.8474e-01, PNorm = 58.6116, GNorm = 0.5684, lr_0 = 8.2252e-04
Loss = 1.9150e-01, PNorm = 58.6414, GNorm = 1.3332, lr_0 = 8.2196e-04
Loss = 1.7389e-01, PNorm = 58.6716, GNorm = 0.9742, lr_0 = 8.2140e-04
Loss = 1.8285e-01, PNorm = 58.6965, GNorm = 0.7413, lr_0 = 8.2084e-04
Loss = 1.9836e-01, PNorm = 58.7124, GNorm = 2.0995, lr_0 = 8.2027e-04
Loss = 2.0245e-01, PNorm = 58.7384, GNorm = 2.8396, lr_0 = 8.1971e-04
Loss = 2.0648e-01, PNorm = 58.7756, GNorm = 1.6390, lr_0 = 8.1915e-04
Loss = 2.0477e-01, PNorm = 58.8006, GNorm = 0.6968, lr_0 = 8.1859e-04
Loss = 2.0387e-01, PNorm = 58.8283, GNorm = 1.5437, lr_0 = 8.1803e-04
Loss = 1.7210e-01, PNorm = 58.8494, GNorm = 0.8472, lr_0 = 8.1747e-04
Loss = 1.7125e-01, PNorm = 58.8809, GNorm = 1.0210, lr_0 = 8.1691e-04
Loss = 1.8814e-01, PNorm = 58.9028, GNorm = 1.4796, lr_0 = 8.1635e-04
Loss = 1.8637e-01, PNorm = 58.9345, GNorm = 0.6720, lr_0 = 8.1579e-04
Loss = 1.9785e-01, PNorm = 58.9562, GNorm = 0.8879, lr_0 = 8.1523e-04
Loss = 1.7159e-01, PNorm = 58.9878, GNorm = 1.2658, lr_0 = 8.1467e-04
Loss = 1.9973e-01, PNorm = 59.0120, GNorm = 1.6885, lr_0 = 8.1411e-04
Loss = 1.7705e-01, PNorm = 59.0360, GNorm = 1.2836, lr_0 = 8.1355e-04
Loss = 2.0571e-01, PNorm = 59.0582, GNorm = 0.5601, lr_0 = 8.1300e-04
Loss = 1.7709e-01, PNorm = 59.0821, GNorm = 0.8727, lr_0 = 8.1244e-04
Loss = 1.9075e-01, PNorm = 59.1105, GNorm = 0.6848, lr_0 = 8.1188e-04
Loss = 2.0418e-01, PNorm = 59.1423, GNorm = 0.6224, lr_0 = 8.1133e-04
Loss = 1.9061e-01, PNorm = 59.1738, GNorm = 0.9613, lr_0 = 8.1077e-04
Loss = 1.7906e-01, PNorm = 59.1994, GNorm = 1.8924, lr_0 = 8.1022e-04
Loss = 1.8835e-01, PNorm = 59.2191, GNorm = 0.7053, lr_0 = 8.0966e-04
Loss = 1.7098e-01, PNorm = 59.2453, GNorm = 0.6272, lr_0 = 8.0911e-04
Loss = 1.8795e-01, PNorm = 59.2687, GNorm = 2.6125, lr_0 = 8.0855e-04
Loss = 1.8865e-01, PNorm = 59.2949, GNorm = 0.7019, lr_0 = 8.0800e-04
Loss = 1.9378e-01, PNorm = 59.3226, GNorm = 1.8407, lr_0 = 8.0745e-04
Loss = 2.0145e-01, PNorm = 59.3464, GNorm = 1.2286, lr_0 = 8.0689e-04
Loss = 1.8232e-01, PNorm = 59.3674, GNorm = 0.6110, lr_0 = 8.0634e-04
Loss = 1.7511e-01, PNorm = 59.3915, GNorm = 1.7411, lr_0 = 8.0579e-04
Loss = 2.0128e-01, PNorm = 59.4123, GNorm = 1.0996, lr_0 = 8.0523e-04
Loss = 2.2041e-01, PNorm = 59.4458, GNorm = 2.5072, lr_0 = 8.0468e-04
Loss = 2.0552e-01, PNorm = 59.4720, GNorm = 0.8468, lr_0 = 8.0413e-04
Loss = 1.8944e-01, PNorm = 59.5020, GNorm = 0.8492, lr_0 = 8.0358e-04
Loss = 1.9495e-01, PNorm = 59.5300, GNorm = 0.9271, lr_0 = 8.0303e-04
Loss = 1.9397e-01, PNorm = 59.5593, GNorm = 1.3939, lr_0 = 8.0248e-04
Loss = 1.6925e-01, PNorm = 59.5815, GNorm = 1.2684, lr_0 = 8.0193e-04
Loss = 1.8659e-01, PNorm = 59.6050, GNorm = 0.6906, lr_0 = 8.0138e-04
Loss = 1.9143e-01, PNorm = 59.6279, GNorm = 0.9394, lr_0 = 8.0083e-04
Loss = 2.0205e-01, PNorm = 59.6492, GNorm = 0.7868, lr_0 = 8.0028e-04
Loss = 1.7353e-01, PNorm = 59.6735, GNorm = 1.1720, lr_0 = 7.9974e-04
Loss = 2.3115e-01, PNorm = 59.7062, GNorm = 1.4026, lr_0 = 7.9919e-04
Loss = 2.0504e-01, PNorm = 59.7403, GNorm = 2.0772, lr_0 = 7.9864e-04
Loss = 2.0053e-01, PNorm = 59.7672, GNorm = 1.0757, lr_0 = 7.9809e-04
Loss = 1.7128e-01, PNorm = 59.8003, GNorm = 1.3262, lr_0 = 7.9755e-04
Loss = 2.0303e-01, PNorm = 59.8263, GNorm = 1.0949, lr_0 = 7.9700e-04
Loss = 2.2341e-01, PNorm = 59.8567, GNorm = 1.7769, lr_0 = 7.9645e-04
Loss = 1.8371e-01, PNorm = 59.8905, GNorm = 0.7005, lr_0 = 7.9591e-04
Loss = 1.9598e-01, PNorm = 59.9231, GNorm = 1.4159, lr_0 = 7.9536e-04
Loss = 1.7286e-01, PNorm = 59.9547, GNorm = 0.7112, lr_0 = 7.9482e-04
Loss = 1.9870e-01, PNorm = 59.9810, GNorm = 0.9414, lr_0 = 7.9427e-04
Loss = 1.9561e-01, PNorm = 60.0089, GNorm = 0.9518, lr_0 = 7.9373e-04
Loss = 1.6337e-01, PNorm = 60.0325, GNorm = 0.9846, lr_0 = 7.9319e-04
Loss = 1.8459e-01, PNorm = 60.0499, GNorm = 0.7920, lr_0 = 7.9264e-04
Loss = 1.5967e-01, PNorm = 60.0711, GNorm = 1.1625, lr_0 = 7.9210e-04
Loss = 2.0012e-01, PNorm = 60.0914, GNorm = 0.8996, lr_0 = 7.9156e-04
Loss = 2.0438e-01, PNorm = 60.1178, GNorm = 0.9454, lr_0 = 7.9101e-04
Loss = 1.9801e-01, PNorm = 60.1401, GNorm = 0.9908, lr_0 = 7.9047e-04
Loss = 1.7365e-01, PNorm = 60.1613, GNorm = 1.0424, lr_0 = 7.8993e-04
Loss = 1.8446e-01, PNorm = 60.1804, GNorm = 2.0516, lr_0 = 7.8939e-04
Loss = 1.8679e-01, PNorm = 60.2079, GNorm = 1.2884, lr_0 = 7.8885e-04
Loss = 1.9750e-01, PNorm = 60.2326, GNorm = 1.1466, lr_0 = 7.8831e-04
Loss = 1.9414e-01, PNorm = 60.2593, GNorm = 1.1802, lr_0 = 7.8777e-04
Loss = 1.6783e-01, PNorm = 60.2800, GNorm = 0.6256, lr_0 = 7.8723e-04
Loss = 1.7976e-01, PNorm = 60.3020, GNorm = 1.4747, lr_0 = 7.8669e-04
Loss = 1.7669e-01, PNorm = 60.3243, GNorm = 1.1268, lr_0 = 7.8615e-04
Loss = 1.8809e-01, PNorm = 60.3549, GNorm = 2.5155, lr_0 = 7.8561e-04
Loss = 1.9022e-01, PNorm = 60.3803, GNorm = 0.8407, lr_0 = 7.8507e-04
Loss = 1.8251e-01, PNorm = 60.4087, GNorm = 0.7342, lr_0 = 7.8454e-04
Loss = 2.0911e-01, PNorm = 60.4354, GNorm = 1.1133, lr_0 = 7.8400e-04
Loss = 1.9757e-01, PNorm = 60.4678, GNorm = 1.4607, lr_0 = 7.8346e-04
Loss = 1.6851e-01, PNorm = 60.4935, GNorm = 0.7133, lr_0 = 7.8293e-04
Loss = 1.7782e-01, PNorm = 60.5156, GNorm = 0.6809, lr_0 = 7.8239e-04
Loss = 1.9541e-01, PNorm = 60.5456, GNorm = 0.7347, lr_0 = 7.8185e-04
Loss = 1.8634e-01, PNorm = 60.5711, GNorm = 0.5769, lr_0 = 7.8132e-04
Validation mae = 0.252857
Epoch 5
Loss = 1.7192e-01, PNorm = 60.5927, GNorm = 0.7875, lr_0 = 7.8078e-04
Loss = 1.5778e-01, PNorm = 60.6160, GNorm = 0.9283, lr_0 = 7.8025e-04
Loss = 1.8526e-01, PNorm = 60.6403, GNorm = 1.0584, lr_0 = 7.7971e-04
Loss = 1.8853e-01, PNorm = 60.6728, GNorm = 1.9548, lr_0 = 7.7918e-04
Loss = 2.0552e-01, PNorm = 60.7146, GNorm = 1.9076, lr_0 = 7.7864e-04
Loss = 1.7450e-01, PNorm = 60.7616, GNorm = 1.1056, lr_0 = 7.7811e-04
Loss = 1.7322e-01, PNorm = 60.7950, GNorm = 0.8943, lr_0 = 7.7758e-04
Loss = 1.5061e-01, PNorm = 60.8212, GNorm = 1.6279, lr_0 = 7.7705e-04
Loss = 1.6752e-01, PNorm = 60.8511, GNorm = 0.9554, lr_0 = 7.7651e-04
Loss = 2.0486e-01, PNorm = 60.8781, GNorm = 1.2603, lr_0 = 7.7598e-04
Loss = 1.8603e-01, PNorm = 60.9095, GNorm = 1.4036, lr_0 = 7.7545e-04
Loss = 1.6913e-01, PNorm = 60.9253, GNorm = 0.5086, lr_0 = 7.7492e-04
Loss = 1.7995e-01, PNorm = 60.9505, GNorm = 0.7273, lr_0 = 7.7439e-04
Loss = 1.7813e-01, PNorm = 60.9738, GNorm = 1.3185, lr_0 = 7.7386e-04
Loss = 1.8687e-01, PNorm = 61.0035, GNorm = 0.5759, lr_0 = 7.7333e-04
Loss = 1.8749e-01, PNorm = 61.0226, GNorm = 0.9149, lr_0 = 7.7280e-04
Loss = 1.9254e-01, PNorm = 61.0508, GNorm = 1.2494, lr_0 = 7.7227e-04
Loss = 1.8015e-01, PNorm = 61.0717, GNorm = 0.5647, lr_0 = 7.7174e-04
Loss = 1.8455e-01, PNorm = 61.1050, GNorm = 0.6536, lr_0 = 7.7121e-04
Loss = 1.9715e-01, PNorm = 61.1252, GNorm = 1.8525, lr_0 = 7.7068e-04
Loss = 1.9312e-01, PNorm = 61.1549, GNorm = 2.8194, lr_0 = 7.7015e-04
Loss = 1.7461e-01, PNorm = 61.1873, GNorm = 0.7892, lr_0 = 7.6963e-04
Loss = 1.8551e-01, PNorm = 61.2082, GNorm = 0.8152, lr_0 = 7.6910e-04
Loss = 1.7193e-01, PNorm = 61.2289, GNorm = 0.6436, lr_0 = 7.6857e-04
Loss = 1.6348e-01, PNorm = 61.2510, GNorm = 0.5748, lr_0 = 7.6805e-04
Loss = 1.6599e-01, PNorm = 61.2687, GNorm = 0.6683, lr_0 = 7.6752e-04
Loss = 1.8831e-01, PNorm = 61.2878, GNorm = 1.2328, lr_0 = 7.6699e-04
Loss = 1.9055e-01, PNorm = 61.3108, GNorm = 1.0575, lr_0 = 7.6647e-04
Loss = 1.7586e-01, PNorm = 61.3438, GNorm = 0.5163, lr_0 = 7.6594e-04
Loss = 1.9651e-01, PNorm = 61.3716, GNorm = 1.9507, lr_0 = 7.6542e-04
Loss = 1.7663e-01, PNorm = 61.4064, GNorm = 1.8804, lr_0 = 7.6489e-04
Loss = 1.6575e-01, PNorm = 61.4277, GNorm = 1.2570, lr_0 = 7.6437e-04
Loss = 1.6870e-01, PNorm = 61.4485, GNorm = 1.0116, lr_0 = 7.6385e-04
Loss = 1.5476e-01, PNorm = 61.4668, GNorm = 0.8362, lr_0 = 7.6332e-04
Loss = 1.7675e-01, PNorm = 61.4858, GNorm = 0.8576, lr_0 = 7.6280e-04
Loss = 1.6910e-01, PNorm = 61.5097, GNorm = 0.8420, lr_0 = 7.6228e-04
Loss = 1.6891e-01, PNorm = 61.5376, GNorm = 1.0929, lr_0 = 7.6176e-04
Loss = 1.6042e-01, PNorm = 61.5604, GNorm = 0.7156, lr_0 = 7.6123e-04
Loss = 1.8016e-01, PNorm = 61.5799, GNorm = 0.6173, lr_0 = 7.6071e-04
Loss = 1.6906e-01, PNorm = 61.5956, GNorm = 0.6881, lr_0 = 7.6019e-04
Loss = 1.8658e-01, PNorm = 61.6209, GNorm = 1.8335, lr_0 = 7.5967e-04
Loss = 1.7783e-01, PNorm = 61.6502, GNorm = 1.0545, lr_0 = 7.5915e-04
Loss = 1.8889e-01, PNorm = 61.6745, GNorm = 0.7657, lr_0 = 7.5863e-04
Loss = 1.7881e-01, PNorm = 61.7026, GNorm = 1.3172, lr_0 = 7.5811e-04
Loss = 1.8501e-01, PNorm = 61.7300, GNorm = 0.7852, lr_0 = 7.5759e-04
Loss = 2.0174e-01, PNorm = 61.7612, GNorm = 0.9196, lr_0 = 7.5707e-04
Loss = 1.7523e-01, PNorm = 61.7845, GNorm = 0.7412, lr_0 = 7.5655e-04
Loss = 2.0427e-01, PNorm = 61.7941, GNorm = 0.7199, lr_0 = 7.5603e-04
Loss = 1.5850e-01, PNorm = 61.8181, GNorm = 1.3393, lr_0 = 7.5552e-04
Loss = 1.6199e-01, PNorm = 61.8413, GNorm = 0.7054, lr_0 = 7.5500e-04
Loss = 1.6455e-01, PNorm = 61.8643, GNorm = 1.0683, lr_0 = 7.5448e-04
Loss = 1.6956e-01, PNorm = 61.8855, GNorm = 0.7044, lr_0 = 7.5397e-04
Loss = 1.7168e-01, PNorm = 61.9097, GNorm = 0.8948, lr_0 = 7.5345e-04
Loss = 1.9485e-01, PNorm = 61.9321, GNorm = 1.6440, lr_0 = 7.5293e-04
Loss = 1.9535e-01, PNorm = 61.9627, GNorm = 1.3603, lr_0 = 7.5242e-04
Loss = 1.9930e-01, PNorm = 61.9943, GNorm = 0.5191, lr_0 = 7.5190e-04
Loss = 1.6781e-01, PNorm = 62.0216, GNorm = 1.0410, lr_0 = 7.5139e-04
Loss = 1.7225e-01, PNorm = 62.0463, GNorm = 0.6450, lr_0 = 7.5087e-04
Loss = 1.6930e-01, PNorm = 62.0723, GNorm = 0.5662, lr_0 = 7.5036e-04
Loss = 1.7288e-01, PNorm = 62.1053, GNorm = 0.7851, lr_0 = 7.4984e-04
Loss = 1.6222e-01, PNorm = 62.1269, GNorm = 1.2700, lr_0 = 7.4933e-04
Loss = 1.7308e-01, PNorm = 62.1495, GNorm = 0.5348, lr_0 = 7.4882e-04
Loss = 1.7267e-01, PNorm = 62.1698, GNorm = 0.5855, lr_0 = 7.4830e-04
Loss = 1.7715e-01, PNorm = 62.1934, GNorm = 0.9690, lr_0 = 7.4779e-04
Loss = 1.7001e-01, PNorm = 62.2167, GNorm = 0.9613, lr_0 = 7.4728e-04
Loss = 1.7497e-01, PNorm = 62.2365, GNorm = 0.8090, lr_0 = 7.4677e-04
Loss = 1.8280e-01, PNorm = 62.2541, GNorm = 0.9764, lr_0 = 7.4625e-04
Loss = 2.1154e-01, PNorm = 62.2818, GNorm = 1.1273, lr_0 = 7.4574e-04
Loss = 1.7359e-01, PNorm = 62.3116, GNorm = 0.8982, lr_0 = 7.4523e-04
Loss = 1.6403e-01, PNorm = 62.3276, GNorm = 0.7662, lr_0 = 7.4472e-04
Loss = 1.7159e-01, PNorm = 62.3488, GNorm = 1.2279, lr_0 = 7.4421e-04
Loss = 1.6621e-01, PNorm = 62.3659, GNorm = 1.9380, lr_0 = 7.4370e-04
Loss = 1.6771e-01, PNorm = 62.3942, GNorm = 0.5903, lr_0 = 7.4319e-04
Loss = 1.5814e-01, PNorm = 62.4078, GNorm = 0.8232, lr_0 = 7.4268e-04
Loss = 1.5899e-01, PNorm = 62.4295, GNorm = 0.9638, lr_0 = 7.4217e-04
Loss = 1.8105e-01, PNorm = 62.4546, GNorm = 1.5685, lr_0 = 7.4167e-04
Loss = 1.8150e-01, PNorm = 62.4835, GNorm = 0.7854, lr_0 = 7.4116e-04
Loss = 1.9027e-01, PNorm = 62.5075, GNorm = 1.4742, lr_0 = 7.4065e-04
Loss = 1.7340e-01, PNorm = 62.5355, GNorm = 0.8002, lr_0 = 7.4014e-04
Loss = 1.8268e-01, PNorm = 62.5584, GNorm = 0.7595, lr_0 = 7.3964e-04
Loss = 1.8337e-01, PNorm = 62.5745, GNorm = 1.3092, lr_0 = 7.3913e-04
Loss = 1.7042e-01, PNorm = 62.5985, GNorm = 0.9231, lr_0 = 7.3862e-04
Loss = 1.7215e-01, PNorm = 62.6239, GNorm = 1.2120, lr_0 = 7.3812e-04
Loss = 1.7715e-01, PNorm = 62.6459, GNorm = 0.5248, lr_0 = 7.3761e-04
Loss = 1.7037e-01, PNorm = 62.6697, GNorm = 0.6570, lr_0 = 7.3711e-04
Loss = 1.6043e-01, PNorm = 62.6875, GNorm = 0.8356, lr_0 = 7.3660e-04
Loss = 1.6253e-01, PNorm = 62.7075, GNorm = 0.7012, lr_0 = 7.3610e-04
Loss = 1.7697e-01, PNorm = 62.7295, GNorm = 0.9680, lr_0 = 7.3559e-04
Loss = 1.8630e-01, PNorm = 62.7466, GNorm = 1.5392, lr_0 = 7.3509e-04
Loss = 1.9483e-01, PNorm = 62.7624, GNorm = 1.0822, lr_0 = 7.3458e-04
Loss = 1.8907e-01, PNorm = 62.7790, GNorm = 0.9944, lr_0 = 7.3408e-04
Loss = 1.6611e-01, PNorm = 62.8021, GNorm = 0.9477, lr_0 = 7.3358e-04
Loss = 1.8398e-01, PNorm = 62.8295, GNorm = 2.7742, lr_0 = 7.3308e-04
Loss = 2.0693e-01, PNorm = 62.8536, GNorm = 0.8061, lr_0 = 7.3257e-04
Loss = 1.7763e-01, PNorm = 62.8842, GNorm = 1.2165, lr_0 = 7.3207e-04
Loss = 1.6984e-01, PNorm = 62.9051, GNorm = 0.9389, lr_0 = 7.3157e-04
Loss = 1.6961e-01, PNorm = 62.9284, GNorm = 1.0545, lr_0 = 7.3107e-04
Loss = 1.8530e-01, PNorm = 62.9439, GNorm = 1.1453, lr_0 = 7.3057e-04
Loss = 1.7812e-01, PNorm = 62.9590, GNorm = 0.6604, lr_0 = 7.3007e-04
Loss = 1.6447e-01, PNorm = 62.9909, GNorm = 0.8064, lr_0 = 7.2957e-04
Loss = 1.6137e-01, PNorm = 63.0151, GNorm = 0.8323, lr_0 = 7.2907e-04
Loss = 1.9124e-01, PNorm = 63.0450, GNorm = 0.8392, lr_0 = 7.2857e-04
Loss = 1.7108e-01, PNorm = 63.0680, GNorm = 1.1353, lr_0 = 7.2807e-04
Loss = 2.0244e-01, PNorm = 63.0834, GNorm = 1.6012, lr_0 = 7.2757e-04
Loss = 1.8327e-01, PNorm = 63.1126, GNorm = 0.8440, lr_0 = 7.2707e-04
Loss = 1.7753e-01, PNorm = 63.1411, GNorm = 1.4163, lr_0 = 7.2657e-04
Loss = 1.7176e-01, PNorm = 63.1688, GNorm = 1.5570, lr_0 = 7.2608e-04
Loss = 1.7587e-01, PNorm = 63.1987, GNorm = 0.8341, lr_0 = 7.2558e-04
Loss = 1.8273e-01, PNorm = 63.2335, GNorm = 1.1953, lr_0 = 7.2508e-04
Loss = 1.9907e-01, PNorm = 63.2625, GNorm = 1.5908, lr_0 = 7.2458e-04
Loss = 1.8764e-01, PNorm = 63.2858, GNorm = 0.6248, lr_0 = 7.2409e-04
Loss = 1.8457e-01, PNorm = 63.3075, GNorm = 0.7799, lr_0 = 7.2359e-04
Loss = 1.8792e-01, PNorm = 63.3196, GNorm = 1.1486, lr_0 = 7.2310e-04
Loss = 1.7732e-01, PNorm = 63.3441, GNorm = 0.7406, lr_0 = 7.2260e-04
Loss = 1.7263e-01, PNorm = 63.3623, GNorm = 1.3984, lr_0 = 7.2211e-04
Loss = 1.7393e-01, PNorm = 63.3874, GNorm = 1.0433, lr_0 = 7.2161e-04
Loss = 1.6488e-01, PNorm = 63.4105, GNorm = 0.6988, lr_0 = 7.2112e-04
Loss = 1.6990e-01, PNorm = 63.4341, GNorm = 0.6456, lr_0 = 7.2062e-04
Loss = 1.7787e-01, PNorm = 63.4576, GNorm = 1.2353, lr_0 = 7.2013e-04
Loss = 1.6948e-01, PNorm = 63.4885, GNorm = 0.8909, lr_0 = 7.1964e-04
Validation mae = 0.251184
Epoch 6
Loss = 1.6707e-01, PNorm = 63.5095, GNorm = 0.5852, lr_0 = 7.1914e-04
Loss = 1.6029e-01, PNorm = 63.5347, GNorm = 1.3865, lr_0 = 7.1865e-04
Loss = 1.7442e-01, PNorm = 63.5568, GNorm = 1.1372, lr_0 = 7.1816e-04
Loss = 1.7239e-01, PNorm = 63.5796, GNorm = 0.9948, lr_0 = 7.1767e-04
Loss = 1.6043e-01, PNorm = 63.5956, GNorm = 1.5934, lr_0 = 7.1717e-04
Loss = 1.6292e-01, PNorm = 63.6168, GNorm = 1.1847, lr_0 = 7.1668e-04
Loss = 1.4665e-01, PNorm = 63.6428, GNorm = 1.4278, lr_0 = 7.1619e-04
Loss = 1.7263e-01, PNorm = 63.6671, GNorm = 0.9985, lr_0 = 7.1570e-04
Loss = 1.7141e-01, PNorm = 63.6884, GNorm = 0.9567, lr_0 = 7.1521e-04
Loss = 1.5525e-01, PNorm = 63.7094, GNorm = 0.6348, lr_0 = 7.1472e-04
Loss = 1.7375e-01, PNorm = 63.7211, GNorm = 0.7038, lr_0 = 7.1423e-04
Loss = 1.6933e-01, PNorm = 63.7411, GNorm = 0.9336, lr_0 = 7.1374e-04
Loss = 1.7847e-01, PNorm = 63.7607, GNorm = 1.2417, lr_0 = 7.1325e-04
Loss = 1.7424e-01, PNorm = 63.7941, GNorm = 0.9676, lr_0 = 7.1277e-04
Loss = 1.5564e-01, PNorm = 63.8173, GNorm = 0.5764, lr_0 = 7.1228e-04
Loss = 1.7202e-01, PNorm = 63.8446, GNorm = 1.1832, lr_0 = 7.1179e-04
Loss = 1.7174e-01, PNorm = 63.8729, GNorm = 0.7169, lr_0 = 7.1130e-04
Loss = 1.5823e-01, PNorm = 63.8952, GNorm = 0.9980, lr_0 = 7.1081e-04
Loss = 1.4600e-01, PNorm = 63.9170, GNorm = 1.1809, lr_0 = 7.1033e-04
Loss = 1.7906e-01, PNorm = 63.9337, GNorm = 1.8779, lr_0 = 7.0984e-04
Loss = 2.1030e-01, PNorm = 63.9556, GNorm = 1.1959, lr_0 = 7.0935e-04
Loss = 1.5629e-01, PNorm = 63.9847, GNorm = 0.6786, lr_0 = 7.0887e-04
Loss = 1.7580e-01, PNorm = 64.0095, GNorm = 1.0940, lr_0 = 7.0838e-04
Loss = 1.9646e-01, PNorm = 64.0328, GNorm = 1.6085, lr_0 = 7.0790e-04
Loss = 1.7835e-01, PNorm = 64.0670, GNorm = 1.6104, lr_0 = 7.0741e-04
Loss = 1.5175e-01, PNorm = 64.0885, GNorm = 0.7028, lr_0 = 7.0693e-04
Loss = 1.7879e-01, PNorm = 64.1086, GNorm = 1.2762, lr_0 = 7.0644e-04
Loss = 1.6833e-01, PNorm = 64.1264, GNorm = 0.5696, lr_0 = 7.0596e-04
Loss = 1.6188e-01, PNorm = 64.1451, GNorm = 1.0180, lr_0 = 7.0548e-04
Loss = 1.6285e-01, PNorm = 64.1644, GNorm = 0.6307, lr_0 = 7.0499e-04
Loss = 1.6747e-01, PNorm = 64.1809, GNorm = 0.6534, lr_0 = 7.0451e-04
Loss = 1.7745e-01, PNorm = 64.2002, GNorm = 0.9904, lr_0 = 7.0403e-04
Loss = 1.5062e-01, PNorm = 64.2289, GNorm = 0.7007, lr_0 = 7.0354e-04
Loss = 1.7386e-01, PNorm = 64.2484, GNorm = 1.5683, lr_0 = 7.0306e-04
Loss = 1.6907e-01, PNorm = 64.2710, GNorm = 0.4381, lr_0 = 7.0258e-04
Loss = 1.9787e-01, PNorm = 64.3021, GNorm = 1.0284, lr_0 = 7.0210e-04
Loss = 1.8687e-01, PNorm = 64.3266, GNorm = 1.5666, lr_0 = 7.0162e-04
Loss = 1.8031e-01, PNorm = 64.3572, GNorm = 1.3090, lr_0 = 7.0114e-04
Loss = 1.8650e-01, PNorm = 64.3942, GNorm = 1.8559, lr_0 = 7.0066e-04
Loss = 1.8047e-01, PNorm = 64.4269, GNorm = 1.4654, lr_0 = 7.0018e-04
Loss = 1.6536e-01, PNorm = 64.4570, GNorm = 0.9628, lr_0 = 6.9970e-04
Loss = 1.7507e-01, PNorm = 64.4891, GNorm = 0.8971, lr_0 = 6.9922e-04
Loss = 1.9341e-01, PNorm = 64.5171, GNorm = 0.8455, lr_0 = 6.9874e-04
Loss = 1.6896e-01, PNorm = 64.5473, GNorm = 1.9724, lr_0 = 6.9826e-04
Loss = 1.6433e-01, PNorm = 64.5689, GNorm = 1.5603, lr_0 = 6.9778e-04
Loss = 1.6143e-01, PNorm = 64.5985, GNorm = 1.3004, lr_0 = 6.9730e-04
Loss = 1.6696e-01, PNorm = 64.6250, GNorm = 0.6365, lr_0 = 6.9683e-04
Loss = 1.7769e-01, PNorm = 64.6534, GNorm = 0.9129, lr_0 = 6.9635e-04
Loss = 1.6247e-01, PNorm = 64.6771, GNorm = 0.9237, lr_0 = 6.9587e-04
Loss = 1.6951e-01, PNorm = 64.6938, GNorm = 0.7484, lr_0 = 6.9540e-04
Loss = 1.4459e-01, PNorm = 64.7142, GNorm = 0.9380, lr_0 = 6.9492e-04
Loss = 1.5012e-01, PNorm = 64.7292, GNorm = 1.3029, lr_0 = 6.9444e-04
Loss = 1.5790e-01, PNorm = 64.7513, GNorm = 0.7005, lr_0 = 6.9397e-04
Loss = 1.4158e-01, PNorm = 64.7698, GNorm = 0.6173, lr_0 = 6.9349e-04
Loss = 1.3356e-01, PNorm = 64.7879, GNorm = 0.4854, lr_0 = 6.9302e-04
Loss = 1.5699e-01, PNorm = 64.8107, GNorm = 0.8776, lr_0 = 6.9254e-04
Loss = 1.7459e-01, PNorm = 64.8302, GNorm = 0.8422, lr_0 = 6.9207e-04
Loss = 1.6609e-01, PNorm = 64.8492, GNorm = 0.7730, lr_0 = 6.9159e-04
Loss = 1.4676e-01, PNorm = 64.8672, GNorm = 0.7225, lr_0 = 6.9112e-04
Loss = 1.7324e-01, PNorm = 64.8818, GNorm = 0.7059, lr_0 = 6.9065e-04
Loss = 1.7655e-01, PNorm = 64.9047, GNorm = 0.7362, lr_0 = 6.9017e-04
Loss = 1.5530e-01, PNorm = 64.9246, GNorm = 0.6644, lr_0 = 6.8970e-04
Loss = 1.5377e-01, PNorm = 64.9452, GNorm = 1.1127, lr_0 = 6.8923e-04
Loss = 1.7853e-01, PNorm = 64.9648, GNorm = 1.1403, lr_0 = 6.8876e-04
Loss = 2.1340e-01, PNorm = 64.9850, GNorm = 1.4315, lr_0 = 6.8828e-04
Loss = 1.7498e-01, PNorm = 65.0071, GNorm = 0.9871, lr_0 = 6.8781e-04
Loss = 1.5944e-01, PNorm = 65.0314, GNorm = 1.2299, lr_0 = 6.8734e-04
Loss = 1.6525e-01, PNorm = 65.0554, GNorm = 0.5160, lr_0 = 6.8687e-04
Loss = 1.6609e-01, PNorm = 65.0811, GNorm = 0.9093, lr_0 = 6.8640e-04
Loss = 1.6108e-01, PNorm = 65.1037, GNorm = 0.6756, lr_0 = 6.8593e-04
Loss = 1.3673e-01, PNorm = 65.1254, GNorm = 0.6483, lr_0 = 6.8546e-04
Loss = 1.4728e-01, PNorm = 65.1430, GNorm = 0.5492, lr_0 = 6.8499e-04
Loss = 1.5529e-01, PNorm = 65.1582, GNorm = 2.2249, lr_0 = 6.8452e-04
Loss = 1.6822e-01, PNorm = 65.1691, GNorm = 0.9395, lr_0 = 6.8405e-04
Loss = 1.4718e-01, PNorm = 65.1879, GNorm = 0.7422, lr_0 = 6.8358e-04
Loss = 1.6878e-01, PNorm = 65.2073, GNorm = 1.5968, lr_0 = 6.8312e-04
Loss = 1.5340e-01, PNorm = 65.2305, GNorm = 0.8065, lr_0 = 6.8265e-04
Loss = 1.6715e-01, PNorm = 65.2506, GNorm = 0.7258, lr_0 = 6.8218e-04
Loss = 1.7737e-01, PNorm = 65.2742, GNorm = 0.5645, lr_0 = 6.8171e-04
Loss = 1.9443e-01, PNorm = 65.3004, GNorm = 1.6210, lr_0 = 6.8125e-04
Loss = 1.9541e-01, PNorm = 65.3277, GNorm = 1.0026, lr_0 = 6.8078e-04
Loss = 1.7527e-01, PNorm = 65.3500, GNorm = 0.9667, lr_0 = 6.8031e-04
Loss = 1.7791e-01, PNorm = 65.3698, GNorm = 0.7671, lr_0 = 6.7985e-04
Loss = 1.6021e-01, PNorm = 65.3944, GNorm = 0.4856, lr_0 = 6.7938e-04
Loss = 1.5859e-01, PNorm = 65.4112, GNorm = 0.7518, lr_0 = 6.7892e-04
Loss = 1.6854e-01, PNorm = 65.4329, GNorm = 0.8469, lr_0 = 6.7845e-04
Loss = 1.5651e-01, PNorm = 65.4547, GNorm = 0.6602, lr_0 = 6.7799e-04
Loss = 1.8688e-01, PNorm = 65.4734, GNorm = 0.6439, lr_0 = 6.7752e-04
Loss = 1.5395e-01, PNorm = 65.4960, GNorm = 0.8471, lr_0 = 6.7706e-04
Loss = 1.8320e-01, PNorm = 65.5138, GNorm = 0.8847, lr_0 = 6.7659e-04
Loss = 1.4942e-01, PNorm = 65.5313, GNorm = 0.6278, lr_0 = 6.7613e-04
Loss = 1.6070e-01, PNorm = 65.5502, GNorm = 0.7624, lr_0 = 6.7567e-04
Loss = 1.6436e-01, PNorm = 65.5716, GNorm = 0.5994, lr_0 = 6.7520e-04
Loss = 1.6773e-01, PNorm = 65.5896, GNorm = 0.9319, lr_0 = 6.7474e-04
Loss = 1.6741e-01, PNorm = 65.6163, GNorm = 0.8670, lr_0 = 6.7428e-04
Loss = 1.8911e-01, PNorm = 65.6385, GNorm = 1.5730, lr_0 = 6.7382e-04
Loss = 1.6303e-01, PNorm = 65.6601, GNorm = 0.6326, lr_0 = 6.7335e-04
Loss = 1.6001e-01, PNorm = 65.6840, GNorm = 1.2920, lr_0 = 6.7289e-04
Loss = 1.5264e-01, PNorm = 65.7037, GNorm = 0.8292, lr_0 = 6.7243e-04
Loss = 1.4425e-01, PNorm = 65.7272, GNorm = 0.8510, lr_0 = 6.7197e-04
Loss = 1.6934e-01, PNorm = 65.7462, GNorm = 0.6167, lr_0 = 6.7151e-04
Loss = 1.8424e-01, PNorm = 65.7737, GNorm = 0.8547, lr_0 = 6.7105e-04
Loss = 1.7537e-01, PNorm = 65.8041, GNorm = 0.5730, lr_0 = 6.7059e-04
Loss = 1.7343e-01, PNorm = 65.8296, GNorm = 0.7110, lr_0 = 6.7013e-04
Loss = 1.5076e-01, PNorm = 65.8525, GNorm = 0.6690, lr_0 = 6.6967e-04
Loss = 1.6852e-01, PNorm = 65.8705, GNorm = 0.8534, lr_0 = 6.6921e-04
Loss = 1.9414e-01, PNorm = 65.8937, GNorm = 1.3095, lr_0 = 6.6876e-04
Loss = 1.7278e-01, PNorm = 65.9170, GNorm = 1.2395, lr_0 = 6.6830e-04
Loss = 1.4383e-01, PNorm = 65.9416, GNorm = 0.8118, lr_0 = 6.6784e-04
Loss = 1.8640e-01, PNorm = 65.9631, GNorm = 0.8608, lr_0 = 6.6738e-04
Loss = 1.7378e-01, PNorm = 65.9878, GNorm = 1.2481, lr_0 = 6.6693e-04
Loss = 1.4798e-01, PNorm = 66.0033, GNorm = 0.7698, lr_0 = 6.6647e-04
Loss = 1.5527e-01, PNorm = 66.0180, GNorm = 0.5431, lr_0 = 6.6601e-04
Loss = 1.5966e-01, PNorm = 66.0394, GNorm = 0.9686, lr_0 = 6.6556e-04
Loss = 1.7423e-01, PNorm = 66.0647, GNorm = 1.1362, lr_0 = 6.6510e-04
Loss = 1.6825e-01, PNorm = 66.0874, GNorm = 1.0557, lr_0 = 6.6464e-04
Loss = 1.5724e-01, PNorm = 66.1114, GNorm = 0.8811, lr_0 = 6.6419e-04
Loss = 1.5891e-01, PNorm = 66.1323, GNorm = 0.6206, lr_0 = 6.6373e-04
Loss = 1.6387e-01, PNorm = 66.1598, GNorm = 1.1540, lr_0 = 6.6328e-04
Loss = 1.6236e-01, PNorm = 66.1788, GNorm = 0.6760, lr_0 = 6.6282e-04
Validation mae = 0.244287
Epoch 7
Loss = 1.4945e-01, PNorm = 66.1982, GNorm = 0.5993, lr_0 = 6.6237e-04
Loss = 1.6484e-01, PNorm = 66.2162, GNorm = 1.3000, lr_0 = 6.6192e-04
Loss = 1.4206e-01, PNorm = 66.2364, GNorm = 0.7429, lr_0 = 6.6146e-04
Loss = 1.2458e-01, PNorm = 66.2527, GNorm = 0.6776, lr_0 = 6.6101e-04
Loss = 1.6671e-01, PNorm = 66.2727, GNorm = 1.1702, lr_0 = 6.6056e-04
Loss = 1.7279e-01, PNorm = 66.3006, GNorm = 0.7964, lr_0 = 6.6011e-04
Loss = 1.5185e-01, PNorm = 66.3225, GNorm = 0.9119, lr_0 = 6.5965e-04
Loss = 1.5031e-01, PNorm = 66.3504, GNorm = 1.4911, lr_0 = 6.5920e-04
Loss = 1.8285e-01, PNorm = 66.3759, GNorm = 1.8041, lr_0 = 6.5875e-04
Loss = 1.8142e-01, PNorm = 66.4072, GNorm = 0.5072, lr_0 = 6.5830e-04
Loss = 1.7468e-01, PNorm = 66.4392, GNorm = 0.8830, lr_0 = 6.5785e-04
Loss = 1.6895e-01, PNorm = 66.4624, GNorm = 0.9413, lr_0 = 6.5740e-04
Loss = 1.5989e-01, PNorm = 66.4868, GNorm = 0.9057, lr_0 = 6.5695e-04
Loss = 1.7056e-01, PNorm = 66.5072, GNorm = 0.8774, lr_0 = 6.5650e-04
Loss = 1.3846e-01, PNorm = 66.5290, GNorm = 0.7879, lr_0 = 6.5605e-04
Loss = 1.4294e-01, PNorm = 66.5490, GNorm = 0.8129, lr_0 = 6.5560e-04
Loss = 1.3546e-01, PNorm = 66.5766, GNorm = 0.6393, lr_0 = 6.5515e-04
Loss = 1.7375e-01, PNorm = 66.5946, GNorm = 1.4172, lr_0 = 6.5470e-04
Loss = 1.7431e-01, PNorm = 66.6179, GNorm = 0.8500, lr_0 = 6.5425e-04
Loss = 1.7231e-01, PNorm = 66.6459, GNorm = 0.9477, lr_0 = 6.5380e-04
Loss = 1.4929e-01, PNorm = 66.6663, GNorm = 0.9848, lr_0 = 6.5335e-04
Loss = 1.6539e-01, PNorm = 66.6912, GNorm = 0.8059, lr_0 = 6.5291e-04
Loss = 1.4387e-01, PNorm = 66.7117, GNorm = 0.8765, lr_0 = 6.5246e-04
Loss = 1.4793e-01, PNorm = 66.7321, GNorm = 0.7043, lr_0 = 6.5201e-04
Loss = 1.4219e-01, PNorm = 66.7490, GNorm = 1.0060, lr_0 = 6.5157e-04
Loss = 1.4966e-01, PNorm = 66.7674, GNorm = 0.6437, lr_0 = 6.5112e-04
Loss = 1.5555e-01, PNorm = 66.7864, GNorm = 0.7209, lr_0 = 6.5067e-04
Loss = 1.4884e-01, PNorm = 66.8001, GNorm = 0.6562, lr_0 = 6.5023e-04
Loss = 1.7177e-01, PNorm = 66.8161, GNorm = 0.7569, lr_0 = 6.4978e-04
Loss = 1.5802e-01, PNorm = 66.8357, GNorm = 1.0239, lr_0 = 6.4934e-04
Loss = 1.6592e-01, PNorm = 66.8545, GNorm = 0.6069, lr_0 = 6.4889e-04
Loss = 1.6375e-01, PNorm = 66.8759, GNorm = 0.8226, lr_0 = 6.4845e-04
Loss = 1.6986e-01, PNorm = 66.8979, GNorm = 1.2406, lr_0 = 6.4800e-04
Loss = 1.4938e-01, PNorm = 66.9277, GNorm = 0.7846, lr_0 = 6.4756e-04
Loss = 1.5091e-01, PNorm = 66.9497, GNorm = 0.5619, lr_0 = 6.4712e-04
Loss = 1.5490e-01, PNorm = 66.9570, GNorm = 0.7673, lr_0 = 6.4667e-04
Loss = 1.5550e-01, PNorm = 66.9783, GNorm = 0.5261, lr_0 = 6.4623e-04
Loss = 1.4123e-01, PNorm = 66.9975, GNorm = 1.0983, lr_0 = 6.4579e-04
Loss = 1.5562e-01, PNorm = 67.0209, GNorm = 0.6213, lr_0 = 6.4534e-04
Loss = 1.3785e-01, PNorm = 67.0449, GNorm = 1.5280, lr_0 = 6.4490e-04
Loss = 1.5462e-01, PNorm = 67.0687, GNorm = 1.0878, lr_0 = 6.4446e-04
Loss = 1.6802e-01, PNorm = 67.0900, GNorm = 0.6933, lr_0 = 6.4402e-04
Loss = 1.5893e-01, PNorm = 67.1121, GNorm = 1.2895, lr_0 = 6.4358e-04
Loss = 1.5451e-01, PNorm = 67.1330, GNorm = 0.5213, lr_0 = 6.4314e-04
Loss = 1.4836e-01, PNorm = 67.1521, GNorm = 0.5468, lr_0 = 6.4270e-04
Loss = 1.4322e-01, PNorm = 67.1759, GNorm = 0.6188, lr_0 = 6.4226e-04
Loss = 1.6261e-01, PNorm = 67.1949, GNorm = 0.8174, lr_0 = 6.4182e-04
Loss = 1.5052e-01, PNorm = 67.2142, GNorm = 0.9528, lr_0 = 6.4138e-04
Loss = 1.6898e-01, PNorm = 67.2338, GNorm = 0.5514, lr_0 = 6.4094e-04
Loss = 1.7286e-01, PNorm = 67.2606, GNorm = 1.4537, lr_0 = 6.4050e-04
Loss = 1.6095e-01, PNorm = 67.2887, GNorm = 0.9481, lr_0 = 6.4006e-04
Loss = 1.5092e-01, PNorm = 67.3084, GNorm = 1.0011, lr_0 = 6.3962e-04
Loss = 1.4666e-01, PNorm = 67.3274, GNorm = 0.5477, lr_0 = 6.3918e-04
Loss = 1.4118e-01, PNorm = 67.3499, GNorm = 0.6401, lr_0 = 6.3874e-04
Loss = 1.4552e-01, PNorm = 67.3723, GNorm = 0.8524, lr_0 = 6.3831e-04
Loss = 1.4921e-01, PNorm = 67.3919, GNorm = 0.7634, lr_0 = 6.3787e-04
Loss = 1.6453e-01, PNorm = 67.4074, GNorm = 1.0047, lr_0 = 6.3743e-04
Loss = 1.5047e-01, PNorm = 67.4242, GNorm = 0.7268, lr_0 = 6.3700e-04
Loss = 1.4194e-01, PNorm = 67.4411, GNorm = 1.0656, lr_0 = 6.3656e-04
Loss = 1.5679e-01, PNorm = 67.4658, GNorm = 0.8211, lr_0 = 6.3612e-04
Loss = 1.6016e-01, PNorm = 67.4870, GNorm = 0.4988, lr_0 = 6.3569e-04
Loss = 1.5344e-01, PNorm = 67.5059, GNorm = 0.6288, lr_0 = 6.3525e-04
Loss = 1.5735e-01, PNorm = 67.5183, GNorm = 0.7467, lr_0 = 6.3482e-04
Loss = 1.6154e-01, PNorm = 67.5344, GNorm = 0.5871, lr_0 = 6.3438e-04
Loss = 1.4699e-01, PNorm = 67.5539, GNorm = 0.8536, lr_0 = 6.3395e-04
Loss = 1.4754e-01, PNorm = 67.5786, GNorm = 0.5955, lr_0 = 6.3351e-04
Loss = 1.3895e-01, PNorm = 67.5984, GNorm = 1.5306, lr_0 = 6.3308e-04
Loss = 1.3776e-01, PNorm = 67.6165, GNorm = 1.2641, lr_0 = 6.3265e-04
Loss = 1.6049e-01, PNorm = 67.6292, GNorm = 0.7774, lr_0 = 6.3221e-04
Loss = 1.4161e-01, PNorm = 67.6538, GNorm = 0.7311, lr_0 = 6.3178e-04
Loss = 2.0093e-01, PNorm = 67.6759, GNorm = 0.7775, lr_0 = 6.3135e-04
Loss = 1.5089e-01, PNorm = 67.7029, GNorm = 0.8607, lr_0 = 6.3091e-04
Loss = 1.6236e-01, PNorm = 67.7244, GNorm = 0.4999, lr_0 = 6.3048e-04
Loss = 1.6067e-01, PNorm = 67.7478, GNorm = 0.5596, lr_0 = 6.3005e-04
Loss = 1.4791e-01, PNorm = 67.7680, GNorm = 0.9894, lr_0 = 6.2962e-04
Loss = 1.4765e-01, PNorm = 67.7826, GNorm = 0.8501, lr_0 = 6.2919e-04
Loss = 1.6433e-01, PNorm = 67.8077, GNorm = 1.0795, lr_0 = 6.2876e-04
Loss = 1.4455e-01, PNorm = 67.8355, GNorm = 0.6569, lr_0 = 6.2833e-04
Loss = 1.7592e-01, PNorm = 67.8655, GNorm = 2.0166, lr_0 = 6.2789e-04
Loss = 1.7430e-01, PNorm = 67.8897, GNorm = 0.8355, lr_0 = 6.2746e-04
Loss = 1.5858e-01, PNorm = 67.9102, GNorm = 0.7180, lr_0 = 6.2703e-04
Loss = 1.7662e-01, PNorm = 67.9295, GNorm = 0.7637, lr_0 = 6.2661e-04
Loss = 1.7310e-01, PNorm = 67.9466, GNorm = 0.6876, lr_0 = 6.2618e-04
Loss = 1.5221e-01, PNorm = 67.9739, GNorm = 1.0192, lr_0 = 6.2575e-04
Loss = 1.5880e-01, PNorm = 67.9921, GNorm = 1.3938, lr_0 = 6.2532e-04
Loss = 1.5396e-01, PNorm = 68.0128, GNorm = 0.6894, lr_0 = 6.2489e-04
Loss = 1.3945e-01, PNorm = 68.0319, GNorm = 0.5360, lr_0 = 6.2446e-04
Loss = 1.5503e-01, PNorm = 68.0547, GNorm = 1.1771, lr_0 = 6.2403e-04
Loss = 1.4012e-01, PNorm = 68.0732, GNorm = 1.0303, lr_0 = 6.2361e-04
Loss = 1.6343e-01, PNorm = 68.0904, GNorm = 0.8561, lr_0 = 6.2318e-04
Loss = 1.6472e-01, PNorm = 68.1122, GNorm = 0.8961, lr_0 = 6.2275e-04
Loss = 1.6534e-01, PNorm = 68.1358, GNorm = 0.5317, lr_0 = 6.2233e-04
Loss = 1.5457e-01, PNorm = 68.1512, GNorm = 1.0663, lr_0 = 6.2190e-04
Loss = 1.7270e-01, PNorm = 68.1674, GNorm = 0.6466, lr_0 = 6.2147e-04
Loss = 1.4529e-01, PNorm = 68.1803, GNorm = 0.7610, lr_0 = 6.2105e-04
Loss = 1.7455e-01, PNorm = 68.2028, GNorm = 1.0753, lr_0 = 6.2062e-04
Loss = 1.5236e-01, PNorm = 68.2175, GNorm = 0.7405, lr_0 = 6.2020e-04
Loss = 1.7449e-01, PNorm = 68.2309, GNorm = 0.9341, lr_0 = 6.1977e-04
Loss = 1.7005e-01, PNorm = 68.2514, GNorm = 0.6306, lr_0 = 6.1935e-04
Loss = 1.5690e-01, PNorm = 68.2722, GNorm = 0.7239, lr_0 = 6.1892e-04
Loss = 1.4374e-01, PNorm = 68.2984, GNorm = 0.7342, lr_0 = 6.1850e-04
Loss = 1.6295e-01, PNorm = 68.3203, GNorm = 1.6768, lr_0 = 6.1808e-04
Loss = 1.7477e-01, PNorm = 68.3475, GNorm = 1.0864, lr_0 = 6.1765e-04
Loss = 1.6411e-01, PNorm = 68.3695, GNorm = 1.5952, lr_0 = 6.1723e-04
Loss = 1.6801e-01, PNorm = 68.3825, GNorm = 0.8459, lr_0 = 6.1681e-04
Loss = 1.5999e-01, PNorm = 68.4027, GNorm = 0.6326, lr_0 = 6.1638e-04
Loss = 1.5395e-01, PNorm = 68.4175, GNorm = 1.1336, lr_0 = 6.1596e-04
Loss = 1.5695e-01, PNorm = 68.4338, GNorm = 0.5169, lr_0 = 6.1554e-04
Loss = 1.3561e-01, PNorm = 68.4595, GNorm = 0.5038, lr_0 = 6.1512e-04
Loss = 1.2861e-01, PNorm = 68.4772, GNorm = 1.0055, lr_0 = 6.1470e-04
Loss = 1.5741e-01, PNorm = 68.4945, GNorm = 1.1436, lr_0 = 6.1428e-04
Loss = 1.8136e-01, PNorm = 68.5195, GNorm = 1.4864, lr_0 = 6.1385e-04
Loss = 1.6658e-01, PNorm = 68.5418, GNorm = 0.9064, lr_0 = 6.1343e-04
Loss = 1.5119e-01, PNorm = 68.5598, GNorm = 0.8904, lr_0 = 6.1301e-04
Loss = 1.2576e-01, PNorm = 68.5813, GNorm = 0.6327, lr_0 = 6.1259e-04
Loss = 1.5729e-01, PNorm = 68.6003, GNorm = 0.6359, lr_0 = 6.1217e-04
Loss = 1.8201e-01, PNorm = 68.6214, GNorm = 1.2768, lr_0 = 6.1175e-04
Loss = 1.4881e-01, PNorm = 68.6397, GNorm = 0.6093, lr_0 = 6.1134e-04
Loss = 1.9191e-01, PNorm = 68.6574, GNorm = 0.4645, lr_0 = 6.1092e-04
Loss = 1.5659e-01, PNorm = 68.6849, GNorm = 0.7682, lr_0 = 6.1050e-04
Validation mae = 0.243313
Epoch 8
Loss = 1.4216e-01, PNorm = 68.7068, GNorm = 0.5305, lr_0 = 6.1008e-04
Loss = 1.6786e-01, PNorm = 68.7349, GNorm = 0.8492, lr_0 = 6.0966e-04
Loss = 1.3874e-01, PNorm = 68.7531, GNorm = 0.6602, lr_0 = 6.0924e-04
Loss = 1.7209e-01, PNorm = 68.7780, GNorm = 0.6622, lr_0 = 6.0883e-04
Loss = 1.5427e-01, PNorm = 68.8020, GNorm = 0.5048, lr_0 = 6.0841e-04
Loss = 1.3186e-01, PNorm = 68.8232, GNorm = 0.6439, lr_0 = 6.0799e-04
Loss = 1.7205e-01, PNorm = 68.8445, GNorm = 1.3943, lr_0 = 6.0758e-04
Loss = 1.4138e-01, PNorm = 68.8617, GNorm = 0.5665, lr_0 = 6.0716e-04
Loss = 1.4877e-01, PNorm = 68.8811, GNorm = 1.4179, lr_0 = 6.0674e-04
Loss = 1.5883e-01, PNorm = 68.8934, GNorm = 0.9239, lr_0 = 6.0633e-04
Loss = 1.5652e-01, PNorm = 68.9159, GNorm = 0.9450, lr_0 = 6.0591e-04
Loss = 1.4107e-01, PNorm = 68.9364, GNorm = 1.0156, lr_0 = 6.0550e-04
Loss = 1.6161e-01, PNorm = 68.9537, GNorm = 0.7712, lr_0 = 6.0508e-04
Loss = 1.3297e-01, PNorm = 68.9714, GNorm = 0.9081, lr_0 = 6.0467e-04
Loss = 1.5049e-01, PNorm = 68.9906, GNorm = 0.5835, lr_0 = 6.0425e-04
Loss = 1.4634e-01, PNorm = 69.0120, GNorm = 0.5558, lr_0 = 6.0384e-04
Loss = 1.4011e-01, PNorm = 69.0341, GNorm = 1.3991, lr_0 = 6.0343e-04
Loss = 1.5372e-01, PNorm = 69.0548, GNorm = 0.8621, lr_0 = 6.0301e-04
Loss = 1.4435e-01, PNorm = 69.0754, GNorm = 1.2582, lr_0 = 6.0260e-04
Loss = 1.4496e-01, PNorm = 69.0917, GNorm = 0.7799, lr_0 = 6.0219e-04
Loss = 1.4313e-01, PNorm = 69.1049, GNorm = 0.9891, lr_0 = 6.0178e-04
Loss = 1.3850e-01, PNorm = 69.1221, GNorm = 1.2299, lr_0 = 6.0136e-04
Loss = 1.6065e-01, PNorm = 69.1444, GNorm = 2.0793, lr_0 = 6.0095e-04
Loss = 1.5138e-01, PNorm = 69.1689, GNorm = 0.9423, lr_0 = 6.0054e-04
Loss = 1.1712e-01, PNorm = 69.1869, GNorm = 0.6491, lr_0 = 6.0013e-04
Loss = 1.5099e-01, PNorm = 69.2066, GNorm = 0.6544, lr_0 = 5.9972e-04
Loss = 1.3585e-01, PNorm = 69.2251, GNorm = 0.6233, lr_0 = 5.9931e-04
Loss = 1.3007e-01, PNorm = 69.2459, GNorm = 0.4386, lr_0 = 5.9890e-04
Loss = 1.2835e-01, PNorm = 69.2578, GNorm = 0.7823, lr_0 = 5.9849e-04
Loss = 1.4582e-01, PNorm = 69.2709, GNorm = 0.8025, lr_0 = 5.9808e-04
Loss = 1.4505e-01, PNorm = 69.2871, GNorm = 0.6043, lr_0 = 5.9767e-04
Loss = 1.6251e-01, PNorm = 69.3061, GNorm = 0.8507, lr_0 = 5.9726e-04
Loss = 1.8268e-01, PNorm = 69.3301, GNorm = 0.7437, lr_0 = 5.9685e-04
Loss = 1.6595e-01, PNorm = 69.3498, GNorm = 1.5434, lr_0 = 5.9644e-04
Loss = 1.6002e-01, PNorm = 69.3694, GNorm = 2.1585, lr_0 = 5.9603e-04
Loss = 1.6231e-01, PNorm = 69.3882, GNorm = 1.1617, lr_0 = 5.9562e-04
Loss = 1.4591e-01, PNorm = 69.4045, GNorm = 0.6961, lr_0 = 5.9521e-04
Loss = 1.4755e-01, PNorm = 69.4191, GNorm = 1.1379, lr_0 = 5.9481e-04
Loss = 1.4159e-01, PNorm = 69.4386, GNorm = 1.1203, lr_0 = 5.9440e-04
Loss = 1.7362e-01, PNorm = 69.4626, GNorm = 0.6323, lr_0 = 5.9399e-04
Loss = 1.8041e-01, PNorm = 69.4875, GNorm = 0.7617, lr_0 = 5.9358e-04
Loss = 1.4831e-01, PNorm = 69.5121, GNorm = 0.6468, lr_0 = 5.9318e-04
Loss = 1.4895e-01, PNorm = 69.5313, GNorm = 0.9685, lr_0 = 5.9277e-04
Loss = 1.5596e-01, PNorm = 69.5545, GNorm = 0.9404, lr_0 = 5.9236e-04
Loss = 1.5306e-01, PNorm = 69.5700, GNorm = 0.5884, lr_0 = 5.9196e-04
Loss = 1.4605e-01, PNorm = 69.5880, GNorm = 0.9325, lr_0 = 5.9155e-04
Loss = 1.4039e-01, PNorm = 69.6098, GNorm = 0.8605, lr_0 = 5.9115e-04
Loss = 1.7372e-01, PNorm = 69.6297, GNorm = 1.0914, lr_0 = 5.9074e-04
Loss = 1.3280e-01, PNorm = 69.6505, GNorm = 0.5237, lr_0 = 5.9034e-04
Loss = 1.5578e-01, PNorm = 69.6663, GNorm = 1.1103, lr_0 = 5.8993e-04
Loss = 1.2879e-01, PNorm = 69.6831, GNorm = 0.6639, lr_0 = 5.8953e-04
Loss = 1.4367e-01, PNorm = 69.6992, GNorm = 1.6768, lr_0 = 5.8913e-04
Loss = 1.5927e-01, PNorm = 69.7174, GNorm = 1.5186, lr_0 = 5.8872e-04
Loss = 1.3936e-01, PNorm = 69.7445, GNorm = 0.5929, lr_0 = 5.8832e-04
Loss = 1.3983e-01, PNorm = 69.7609, GNorm = 0.4873, lr_0 = 5.8792e-04
Loss = 1.3166e-01, PNorm = 69.7719, GNorm = 0.6902, lr_0 = 5.8751e-04
Loss = 1.3668e-01, PNorm = 69.7913, GNorm = 0.6526, lr_0 = 5.8711e-04
Loss = 1.7717e-01, PNorm = 69.8115, GNorm = 0.9321, lr_0 = 5.8671e-04
Loss = 1.4048e-01, PNorm = 69.8287, GNorm = 1.5256, lr_0 = 5.8631e-04
Loss = 1.4905e-01, PNorm = 69.8449, GNorm = 0.9309, lr_0 = 5.8591e-04
Loss = 1.3114e-01, PNorm = 69.8583, GNorm = 1.1990, lr_0 = 5.8550e-04
Loss = 1.4371e-01, PNorm = 69.8707, GNorm = 0.7789, lr_0 = 5.8510e-04
Loss = 1.4690e-01, PNorm = 69.8866, GNorm = 0.3657, lr_0 = 5.8470e-04
Loss = 1.3654e-01, PNorm = 69.8980, GNorm = 0.6320, lr_0 = 5.8430e-04
Loss = 1.5741e-01, PNorm = 69.9230, GNorm = 0.8703, lr_0 = 5.8390e-04
Loss = 1.7215e-01, PNorm = 69.9428, GNorm = 0.5471, lr_0 = 5.8350e-04
Loss = 1.4911e-01, PNorm = 69.9661, GNorm = 0.7886, lr_0 = 5.8310e-04
Loss = 1.3119e-01, PNorm = 69.9874, GNorm = 0.5676, lr_0 = 5.8270e-04
Loss = 1.4519e-01, PNorm = 70.0078, GNorm = 1.0961, lr_0 = 5.8230e-04
Loss = 1.3520e-01, PNorm = 70.0227, GNorm = 0.5985, lr_0 = 5.8190e-04
Loss = 1.4810e-01, PNorm = 70.0409, GNorm = 0.9322, lr_0 = 5.8151e-04
Loss = 1.6492e-01, PNorm = 70.0546, GNorm = 0.9318, lr_0 = 5.8111e-04
Loss = 1.4694e-01, PNorm = 70.0769, GNorm = 1.4105, lr_0 = 5.8071e-04
Loss = 1.6338e-01, PNorm = 70.0990, GNorm = 1.5008, lr_0 = 5.8031e-04
Loss = 1.4959e-01, PNorm = 70.1234, GNorm = 0.6807, lr_0 = 5.7991e-04
Loss = 1.4567e-01, PNorm = 70.1403, GNorm = 0.7324, lr_0 = 5.7952e-04
Loss = 1.5467e-01, PNorm = 70.1552, GNorm = 0.7575, lr_0 = 5.7912e-04
Loss = 1.5974e-01, PNorm = 70.1703, GNorm = 0.6511, lr_0 = 5.7872e-04
Loss = 1.5759e-01, PNorm = 70.1908, GNorm = 0.7261, lr_0 = 5.7833e-04
Loss = 1.6110e-01, PNorm = 70.2085, GNorm = 1.3916, lr_0 = 5.7793e-04
Loss = 1.4991e-01, PNorm = 70.2267, GNorm = 0.8705, lr_0 = 5.7753e-04
Loss = 1.6428e-01, PNorm = 70.2359, GNorm = 0.8641, lr_0 = 5.7714e-04
Loss = 1.5909e-01, PNorm = 70.2449, GNorm = 0.8429, lr_0 = 5.7674e-04
Loss = 1.6378e-01, PNorm = 70.2598, GNorm = 1.3233, lr_0 = 5.7635e-04
Loss = 1.4965e-01, PNorm = 70.2785, GNorm = 0.7771, lr_0 = 5.7595e-04
Loss = 1.4047e-01, PNorm = 70.2920, GNorm = 0.5723, lr_0 = 5.7556e-04
Loss = 1.4623e-01, PNorm = 70.3045, GNorm = 0.9975, lr_0 = 5.7516e-04
Loss = 1.4611e-01, PNorm = 70.3198, GNorm = 0.7258, lr_0 = 5.7477e-04
Loss = 1.3279e-01, PNorm = 70.3378, GNorm = 0.7247, lr_0 = 5.7438e-04
Loss = 1.4193e-01, PNorm = 70.3542, GNorm = 0.7659, lr_0 = 5.7398e-04
Loss = 1.4903e-01, PNorm = 70.3695, GNorm = 0.6453, lr_0 = 5.7359e-04
Loss = 1.4568e-01, PNorm = 70.3851, GNorm = 0.9194, lr_0 = 5.7320e-04
Loss = 1.3018e-01, PNorm = 70.4062, GNorm = 0.5438, lr_0 = 5.7280e-04
Loss = 1.3901e-01, PNorm = 70.4217, GNorm = 0.7985, lr_0 = 5.7241e-04
Loss = 1.4436e-01, PNorm = 70.4367, GNorm = 0.9848, lr_0 = 5.7202e-04
Loss = 1.4106e-01, PNorm = 70.4494, GNorm = 0.6015, lr_0 = 5.7163e-04
Loss = 1.3392e-01, PNorm = 70.4637, GNorm = 0.8132, lr_0 = 5.7124e-04
Loss = 1.4637e-01, PNorm = 70.4817, GNorm = 0.9682, lr_0 = 5.7084e-04
Loss = 1.4045e-01, PNorm = 70.5008, GNorm = 1.5994, lr_0 = 5.7045e-04
Loss = 1.3586e-01, PNorm = 70.5216, GNorm = 0.8133, lr_0 = 5.7006e-04
Loss = 1.5159e-01, PNorm = 70.5354, GNorm = 1.2875, lr_0 = 5.6967e-04
Loss = 1.3912e-01, PNorm = 70.5499, GNorm = 0.6985, lr_0 = 5.6928e-04
Loss = 1.3556e-01, PNorm = 70.5611, GNorm = 0.6839, lr_0 = 5.6889e-04
Loss = 1.4853e-01, PNorm = 70.5759, GNorm = 1.2149, lr_0 = 5.6850e-04
Loss = 1.4979e-01, PNorm = 70.5963, GNorm = 0.8552, lr_0 = 5.6811e-04
Loss = 1.5948e-01, PNorm = 70.6126, GNorm = 0.8847, lr_0 = 5.6772e-04
Loss = 1.5603e-01, PNorm = 70.6308, GNorm = 0.5478, lr_0 = 5.6733e-04
Loss = 1.4347e-01, PNorm = 70.6447, GNorm = 0.6046, lr_0 = 5.6695e-04
Loss = 1.6068e-01, PNorm = 70.6622, GNorm = 1.1305, lr_0 = 5.6656e-04
Loss = 1.5528e-01, PNorm = 70.6811, GNorm = 0.8129, lr_0 = 5.6617e-04
Loss = 1.4549e-01, PNorm = 70.7043, GNorm = 1.0530, lr_0 = 5.6578e-04
Loss = 1.4199e-01, PNorm = 70.7216, GNorm = 0.9504, lr_0 = 5.6539e-04
Loss = 1.4543e-01, PNorm = 70.7410, GNorm = 0.6091, lr_0 = 5.6501e-04
Loss = 1.6124e-01, PNorm = 70.7532, GNorm = 0.8421, lr_0 = 5.6462e-04
Loss = 1.4622e-01, PNorm = 70.7682, GNorm = 0.8806, lr_0 = 5.6423e-04
Loss = 1.2273e-01, PNorm = 70.7862, GNorm = 0.7616, lr_0 = 5.6385e-04
Loss = 1.3889e-01, PNorm = 70.7986, GNorm = 0.7896, lr_0 = 5.6346e-04
Loss = 1.6179e-01, PNorm = 70.8119, GNorm = 1.5941, lr_0 = 5.6307e-04
Loss = 1.5089e-01, PNorm = 70.8337, GNorm = 0.5663, lr_0 = 5.6269e-04
Loss = 1.5188e-01, PNorm = 70.8524, GNorm = 0.6278, lr_0 = 5.6230e-04
Validation mae = 0.239242
Epoch 9
Loss = 1.3680e-01, PNorm = 70.8717, GNorm = 0.9862, lr_0 = 5.6192e-04
Loss = 1.4549e-01, PNorm = 70.8937, GNorm = 0.8803, lr_0 = 5.6153e-04
Loss = 1.1837e-01, PNorm = 70.9166, GNorm = 0.5345, lr_0 = 5.6115e-04
Loss = 1.2209e-01, PNorm = 70.9306, GNorm = 0.8491, lr_0 = 5.6076e-04
Loss = 1.3039e-01, PNorm = 70.9464, GNorm = 0.6272, lr_0 = 5.6038e-04
Loss = 1.4159e-01, PNorm = 70.9645, GNorm = 0.5941, lr_0 = 5.6000e-04
Loss = 1.2689e-01, PNorm = 70.9795, GNorm = 0.5631, lr_0 = 5.5961e-04
Loss = 1.1625e-01, PNorm = 70.9937, GNorm = 0.9996, lr_0 = 5.5923e-04
Loss = 1.7104e-01, PNorm = 71.0095, GNorm = 0.6527, lr_0 = 5.5885e-04
Loss = 1.3804e-01, PNorm = 71.0278, GNorm = 0.6599, lr_0 = 5.5846e-04
Loss = 1.4973e-01, PNorm = 71.0493, GNorm = 1.5279, lr_0 = 5.5808e-04
Loss = 1.1644e-01, PNorm = 71.0651, GNorm = 0.5338, lr_0 = 5.5770e-04
Loss = 1.3053e-01, PNorm = 71.0803, GNorm = 1.0759, lr_0 = 5.5732e-04
Loss = 1.3735e-01, PNorm = 71.0982, GNorm = 0.7421, lr_0 = 5.5693e-04
Loss = 1.1288e-01, PNorm = 71.1148, GNorm = 0.5840, lr_0 = 5.5655e-04
Loss = 1.4014e-01, PNorm = 71.1312, GNorm = 1.2820, lr_0 = 5.5617e-04
Loss = 1.4376e-01, PNorm = 71.1470, GNorm = 0.5600, lr_0 = 5.5579e-04
Loss = 1.4535e-01, PNorm = 71.1697, GNorm = 1.0065, lr_0 = 5.5541e-04
Loss = 1.3120e-01, PNorm = 71.1862, GNorm = 0.4820, lr_0 = 5.5503e-04
Loss = 1.7003e-01, PNorm = 71.2082, GNorm = 1.4340, lr_0 = 5.5465e-04
Loss = 1.3876e-01, PNorm = 71.2383, GNorm = 0.7202, lr_0 = 5.5427e-04
Loss = 1.2934e-01, PNorm = 71.2578, GNorm = 0.6912, lr_0 = 5.5389e-04
Loss = 1.2074e-01, PNorm = 71.2745, GNorm = 0.6549, lr_0 = 5.5351e-04
Loss = 1.2746e-01, PNorm = 71.2829, GNorm = 1.1252, lr_0 = 5.5313e-04
Loss = 1.5446e-01, PNorm = 71.3030, GNorm = 0.8834, lr_0 = 5.5275e-04
Loss = 1.6229e-01, PNorm = 71.3091, GNorm = 0.4978, lr_0 = 5.5237e-04
Loss = 1.4939e-01, PNorm = 71.3229, GNorm = 1.7665, lr_0 = 5.5199e-04
Loss = 1.4509e-01, PNorm = 71.3414, GNorm = 0.8645, lr_0 = 5.5162e-04
Loss = 1.3840e-01, PNorm = 71.3585, GNorm = 0.5881, lr_0 = 5.5124e-04
Loss = 1.2638e-01, PNorm = 71.3743, GNorm = 0.5682, lr_0 = 5.5086e-04
Loss = 1.4756e-01, PNorm = 71.3887, GNorm = 0.6871, lr_0 = 5.5048e-04
Loss = 1.1902e-01, PNorm = 71.4051, GNorm = 0.7333, lr_0 = 5.5011e-04
Loss = 1.3919e-01, PNorm = 71.4189, GNorm = 0.8194, lr_0 = 5.4973e-04
Loss = 1.3980e-01, PNorm = 71.4350, GNorm = 0.6873, lr_0 = 5.4935e-04
Loss = 1.5354e-01, PNorm = 71.4577, GNorm = 1.1902, lr_0 = 5.4898e-04
Loss = 1.4883e-01, PNorm = 71.4734, GNorm = 1.0984, lr_0 = 5.4860e-04
Loss = 1.4222e-01, PNorm = 71.4955, GNorm = 0.6303, lr_0 = 5.4822e-04
Loss = 1.1855e-01, PNorm = 71.5154, GNorm = 0.4676, lr_0 = 5.4785e-04
Loss = 1.3442e-01, PNorm = 71.5325, GNorm = 0.6848, lr_0 = 5.4747e-04
Loss = 1.3601e-01, PNorm = 71.5491, GNorm = 0.8176, lr_0 = 5.4710e-04
Loss = 1.2176e-01, PNorm = 71.5612, GNorm = 0.5626, lr_0 = 5.4672e-04
Loss = 1.3538e-01, PNorm = 71.5750, GNorm = 0.7756, lr_0 = 5.4635e-04
Loss = 1.4581e-01, PNorm = 71.5891, GNorm = 0.9191, lr_0 = 5.4597e-04
Loss = 1.4387e-01, PNorm = 71.6000, GNorm = 0.7008, lr_0 = 5.4560e-04
Loss = 1.1928e-01, PNorm = 71.6139, GNorm = 0.7308, lr_0 = 5.4523e-04
Loss = 1.4185e-01, PNorm = 71.6332, GNorm = 0.9465, lr_0 = 5.4485e-04
Loss = 1.3633e-01, PNorm = 71.6594, GNorm = 0.5893, lr_0 = 5.4448e-04
Loss = 1.4206e-01, PNorm = 71.6868, GNorm = 1.5038, lr_0 = 5.4411e-04
Loss = 1.3531e-01, PNorm = 71.7066, GNorm = 0.9328, lr_0 = 5.4373e-04
Loss = 1.6421e-01, PNorm = 71.7260, GNorm = 0.6889, lr_0 = 5.4336e-04
Loss = 1.2546e-01, PNorm = 71.7471, GNorm = 0.5449, lr_0 = 5.4299e-04
Loss = 1.4485e-01, PNorm = 71.7587, GNorm = 0.6669, lr_0 = 5.4262e-04
Loss = 1.3715e-01, PNorm = 71.7654, GNorm = 0.5465, lr_0 = 5.4225e-04
Loss = 1.5320e-01, PNorm = 71.7741, GNorm = 0.8504, lr_0 = 5.4187e-04
Loss = 1.4104e-01, PNorm = 71.7864, GNorm = 0.5836, lr_0 = 5.4150e-04
Loss = 1.5730e-01, PNorm = 71.8065, GNorm = 0.8082, lr_0 = 5.4113e-04
Loss = 1.4149e-01, PNorm = 71.8259, GNorm = 0.5900, lr_0 = 5.4076e-04
Loss = 1.4722e-01, PNorm = 71.8508, GNorm = 0.8143, lr_0 = 5.4039e-04
Loss = 1.4403e-01, PNorm = 71.8692, GNorm = 0.6766, lr_0 = 5.4002e-04
Loss = 1.4317e-01, PNorm = 71.8865, GNorm = 0.6335, lr_0 = 5.3965e-04
Loss = 1.4643e-01, PNorm = 71.8997, GNorm = 0.7170, lr_0 = 5.3928e-04
Loss = 1.4208e-01, PNorm = 71.9112, GNorm = 0.7631, lr_0 = 5.3891e-04
Loss = 1.2653e-01, PNorm = 71.9258, GNorm = 1.4154, lr_0 = 5.3854e-04
Loss = 1.3266e-01, PNorm = 71.9412, GNorm = 0.7660, lr_0 = 5.3817e-04
Loss = 1.3634e-01, PNorm = 71.9592, GNorm = 0.6561, lr_0 = 5.3781e-04
Loss = 1.4421e-01, PNorm = 71.9714, GNorm = 0.5301, lr_0 = 5.3744e-04
Loss = 1.4295e-01, PNorm = 71.9846, GNorm = 1.5629, lr_0 = 5.3707e-04
Loss = 1.5644e-01, PNorm = 71.9970, GNorm = 0.6852, lr_0 = 5.3670e-04
Loss = 1.3533e-01, PNorm = 72.0137, GNorm = 0.6559, lr_0 = 5.3633e-04
Loss = 1.4088e-01, PNorm = 72.0283, GNorm = 1.2626, lr_0 = 5.3597e-04
Loss = 1.4099e-01, PNorm = 72.0366, GNorm = 1.1083, lr_0 = 5.3560e-04
Loss = 1.3101e-01, PNorm = 72.0486, GNorm = 0.7996, lr_0 = 5.3523e-04
Loss = 1.3707e-01, PNorm = 72.0676, GNorm = 0.9807, lr_0 = 5.3486e-04
Loss = 1.3249e-01, PNorm = 72.0868, GNorm = 0.9766, lr_0 = 5.3450e-04
Loss = 1.3737e-01, PNorm = 72.1029, GNorm = 1.1224, lr_0 = 5.3413e-04
Loss = 1.6110e-01, PNorm = 72.1177, GNorm = 0.6442, lr_0 = 5.3377e-04
Loss = 1.2885e-01, PNorm = 72.1309, GNorm = 0.5300, lr_0 = 5.3340e-04
Loss = 1.4737e-01, PNorm = 72.1437, GNorm = 0.5938, lr_0 = 5.3304e-04
Loss = 1.4470e-01, PNorm = 72.1582, GNorm = 0.8450, lr_0 = 5.3267e-04
Loss = 1.4680e-01, PNorm = 72.1718, GNorm = 0.9381, lr_0 = 5.3231e-04
Loss = 1.3810e-01, PNorm = 72.1905, GNorm = 1.1404, lr_0 = 5.3194e-04
Loss = 1.6873e-01, PNorm = 72.2108, GNorm = 0.7578, lr_0 = 5.3158e-04
Loss = 1.4885e-01, PNorm = 72.2318, GNorm = 0.6909, lr_0 = 5.3121e-04
Loss = 1.3613e-01, PNorm = 72.2498, GNorm = 0.5666, lr_0 = 5.3085e-04
Loss = 1.5024e-01, PNorm = 72.2638, GNorm = 0.6357, lr_0 = 5.3048e-04
Loss = 1.4221e-01, PNorm = 72.2797, GNorm = 0.7299, lr_0 = 5.3012e-04
Loss = 1.5441e-01, PNorm = 72.2913, GNorm = 0.6726, lr_0 = 5.2976e-04
Loss = 1.4095e-01, PNorm = 72.3032, GNorm = 0.5349, lr_0 = 5.2939e-04
Loss = 1.4628e-01, PNorm = 72.3145, GNorm = 1.2934, lr_0 = 5.2903e-04
Loss = 1.2372e-01, PNorm = 72.3292, GNorm = 0.8799, lr_0 = 5.2867e-04
Loss = 1.3331e-01, PNorm = 72.3492, GNorm = 0.7396, lr_0 = 5.2831e-04
Loss = 1.3840e-01, PNorm = 72.3650, GNorm = 0.7622, lr_0 = 5.2795e-04
Loss = 1.5870e-01, PNorm = 72.3811, GNorm = 1.0955, lr_0 = 5.2758e-04
Loss = 1.3660e-01, PNorm = 72.3999, GNorm = 0.6540, lr_0 = 5.2722e-04
Loss = 1.5012e-01, PNorm = 72.4150, GNorm = 1.9327, lr_0 = 5.2686e-04
Loss = 1.4426e-01, PNorm = 72.4340, GNorm = 1.0318, lr_0 = 5.2650e-04
Loss = 1.4534e-01, PNorm = 72.4528, GNorm = 0.6089, lr_0 = 5.2614e-04
Loss = 1.3181e-01, PNorm = 72.4660, GNorm = 0.6416, lr_0 = 5.2578e-04
Loss = 1.7915e-01, PNorm = 72.4832, GNorm = 0.7767, lr_0 = 5.2542e-04
Loss = 1.4331e-01, PNorm = 72.5044, GNorm = 0.6723, lr_0 = 5.2506e-04
Loss = 1.2826e-01, PNorm = 72.5213, GNorm = 0.9877, lr_0 = 5.2470e-04
Loss = 1.3870e-01, PNorm = 72.5365, GNorm = 0.8633, lr_0 = 5.2434e-04
Loss = 1.5136e-01, PNorm = 72.5431, GNorm = 0.5933, lr_0 = 5.2398e-04
Loss = 1.5156e-01, PNorm = 72.5585, GNorm = 0.7728, lr_0 = 5.2362e-04
Loss = 1.3627e-01, PNorm = 72.5689, GNorm = 0.8392, lr_0 = 5.2326e-04
Loss = 1.4187e-01, PNorm = 72.5829, GNorm = 0.6483, lr_0 = 5.2290e-04
Loss = 1.3785e-01, PNorm = 72.5989, GNorm = 0.7087, lr_0 = 5.2255e-04
Loss = 1.4687e-01, PNorm = 72.6164, GNorm = 0.6267, lr_0 = 5.2219e-04
Loss = 1.4957e-01, PNorm = 72.6389, GNorm = 0.8119, lr_0 = 5.2183e-04
Loss = 1.5331e-01, PNorm = 72.6529, GNorm = 0.6941, lr_0 = 5.2147e-04
Loss = 1.2264e-01, PNorm = 72.6687, GNorm = 0.8500, lr_0 = 5.2112e-04
Loss = 1.3397e-01, PNorm = 72.6895, GNorm = 0.5357, lr_0 = 5.2076e-04
Loss = 1.2798e-01, PNorm = 72.7057, GNorm = 0.6538, lr_0 = 5.2040e-04
Loss = 1.5171e-01, PNorm = 72.7209, GNorm = 0.7737, lr_0 = 5.2005e-04
Loss = 1.4533e-01, PNorm = 72.7356, GNorm = 0.7684, lr_0 = 5.1969e-04
Loss = 1.3154e-01, PNorm = 72.7452, GNorm = 1.2161, lr_0 = 5.1933e-04
Loss = 1.6592e-01, PNorm = 72.7602, GNorm = 0.6743, lr_0 = 5.1898e-04
Loss = 1.1630e-01, PNorm = 72.7729, GNorm = 0.5983, lr_0 = 5.1862e-04
Loss = 1.3493e-01, PNorm = 72.7919, GNorm = 0.6029, lr_0 = 5.1827e-04
Loss = 1.6389e-01, PNorm = 72.8028, GNorm = 1.0040, lr_0 = 5.1791e-04
Validation mae = 0.240500
Epoch 10
Loss = 1.0593e-01, PNorm = 72.8184, GNorm = 0.6582, lr_0 = 5.1756e-04
Loss = 1.2945e-01, PNorm = 72.8310, GNorm = 0.9717, lr_0 = 5.1720e-04
Loss = 1.3227e-01, PNorm = 72.8419, GNorm = 0.5756, lr_0 = 5.1685e-04
Loss = 1.4169e-01, PNorm = 72.8564, GNorm = 0.8946, lr_0 = 5.1649e-04
Loss = 1.4542e-01, PNorm = 72.8736, GNorm = 0.7037, lr_0 = 5.1614e-04
Loss = 1.2398e-01, PNorm = 72.8909, GNorm = 0.7776, lr_0 = 5.1579e-04
Loss = 1.2700e-01, PNorm = 72.9071, GNorm = 1.0305, lr_0 = 5.1543e-04
Loss = 1.4506e-01, PNorm = 72.9249, GNorm = 0.5658, lr_0 = 5.1508e-04
Loss = 1.4525e-01, PNorm = 72.9349, GNorm = 0.5370, lr_0 = 5.1473e-04
Loss = 1.4287e-01, PNorm = 72.9551, GNorm = 0.6451, lr_0 = 5.1437e-04
Loss = 1.3773e-01, PNorm = 72.9739, GNorm = 1.0784, lr_0 = 5.1402e-04
Loss = 1.6763e-01, PNorm = 72.9893, GNorm = 0.7633, lr_0 = 5.1367e-04
Loss = 1.3889e-01, PNorm = 73.0018, GNorm = 0.9896, lr_0 = 5.1332e-04
Loss = 1.3172e-01, PNorm = 73.0203, GNorm = 1.2686, lr_0 = 5.1297e-04
Loss = 1.3141e-01, PNorm = 73.0385, GNorm = 0.6436, lr_0 = 5.1262e-04
Loss = 1.5623e-01, PNorm = 73.0560, GNorm = 0.5978, lr_0 = 5.1226e-04
Loss = 1.2684e-01, PNorm = 73.0792, GNorm = 0.6533, lr_0 = 5.1191e-04
Loss = 1.2364e-01, PNorm = 73.1026, GNorm = 0.6322, lr_0 = 5.1156e-04
Loss = 1.1642e-01, PNorm = 73.1150, GNorm = 0.7475, lr_0 = 5.1121e-04
Loss = 1.4121e-01, PNorm = 73.1230, GNorm = 0.7693, lr_0 = 5.1086e-04
Loss = 1.2700e-01, PNorm = 73.1355, GNorm = 0.9842, lr_0 = 5.1051e-04
Loss = 1.4341e-01, PNorm = 73.1478, GNorm = 1.1022, lr_0 = 5.1016e-04
Loss = 1.4658e-01, PNorm = 73.1651, GNorm = 0.6800, lr_0 = 5.0981e-04
Loss = 1.2314e-01, PNorm = 73.1824, GNorm = 1.2488, lr_0 = 5.0946e-04
Loss = 1.2503e-01, PNorm = 73.2012, GNorm = 1.0137, lr_0 = 5.0911e-04
Loss = 1.2601e-01, PNorm = 73.2173, GNorm = 0.5315, lr_0 = 5.0877e-04
Loss = 1.2005e-01, PNorm = 73.2300, GNorm = 0.7899, lr_0 = 5.0842e-04
Loss = 1.2613e-01, PNorm = 73.2417, GNorm = 0.6592, lr_0 = 5.0807e-04
Loss = 1.2517e-01, PNorm = 73.2554, GNorm = 0.5981, lr_0 = 5.0772e-04
Loss = 1.2778e-01, PNorm = 73.2655, GNorm = 0.9386, lr_0 = 5.0737e-04
Loss = 1.3827e-01, PNorm = 73.2847, GNorm = 0.6571, lr_0 = 5.0703e-04
Loss = 1.3691e-01, PNorm = 73.3052, GNorm = 0.5589, lr_0 = 5.0668e-04
Loss = 1.2063e-01, PNorm = 73.3156, GNorm = 0.6423, lr_0 = 5.0633e-04
Loss = 1.3469e-01, PNorm = 73.3299, GNorm = 0.8109, lr_0 = 5.0598e-04
Loss = 1.3081e-01, PNorm = 73.3420, GNorm = 0.6315, lr_0 = 5.0564e-04
Loss = 1.4053e-01, PNorm = 73.3596, GNorm = 0.6390, lr_0 = 5.0529e-04
Loss = 1.4126e-01, PNorm = 73.3770, GNorm = 0.6708, lr_0 = 5.0494e-04
Loss = 1.2672e-01, PNorm = 73.3907, GNorm = 0.7769, lr_0 = 5.0460e-04
Loss = 1.2602e-01, PNorm = 73.4025, GNorm = 0.6117, lr_0 = 5.0425e-04
Loss = 1.4578e-01, PNorm = 73.4176, GNorm = 1.0413, lr_0 = 5.0391e-04
Loss = 1.3296e-01, PNorm = 73.4370, GNorm = 1.2455, lr_0 = 5.0356e-04
Loss = 1.4821e-01, PNorm = 73.4537, GNorm = 1.4099, lr_0 = 5.0322e-04
Loss = 1.2825e-01, PNorm = 73.4723, GNorm = 0.5689, lr_0 = 5.0287e-04
Loss = 1.3608e-01, PNorm = 73.4885, GNorm = 0.5614, lr_0 = 5.0253e-04
Loss = 1.3860e-01, PNorm = 73.5001, GNorm = 0.8666, lr_0 = 5.0218e-04
Loss = 1.4561e-01, PNorm = 73.5112, GNorm = 1.0398, lr_0 = 5.0184e-04
Loss = 1.2641e-01, PNorm = 73.5253, GNorm = 1.0980, lr_0 = 5.0150e-04
Loss = 1.3015e-01, PNorm = 73.5377, GNorm = 1.2477, lr_0 = 5.0115e-04
Loss = 1.3598e-01, PNorm = 73.5502, GNorm = 0.8655, lr_0 = 5.0081e-04
Loss = 1.3308e-01, PNorm = 73.5649, GNorm = 0.8441, lr_0 = 5.0047e-04
Loss = 1.2760e-01, PNorm = 73.5814, GNorm = 0.7067, lr_0 = 5.0012e-04
Loss = 1.2528e-01, PNorm = 73.5972, GNorm = 0.6763, lr_0 = 4.9978e-04
Loss = 1.3551e-01, PNorm = 73.6083, GNorm = 0.8096, lr_0 = 4.9944e-04
Loss = 1.3454e-01, PNorm = 73.6221, GNorm = 0.6196, lr_0 = 4.9910e-04
Loss = 1.2095e-01, PNorm = 73.6321, GNorm = 0.5775, lr_0 = 4.9875e-04
Loss = 1.2769e-01, PNorm = 73.6476, GNorm = 0.5529, lr_0 = 4.9841e-04
Loss = 1.5030e-01, PNorm = 73.6617, GNorm = 0.7395, lr_0 = 4.9807e-04
Loss = 1.2890e-01, PNorm = 73.6758, GNorm = 0.5161, lr_0 = 4.9773e-04
Loss = 1.2350e-01, PNorm = 73.6857, GNorm = 0.7432, lr_0 = 4.9739e-04
Loss = 1.1953e-01, PNorm = 73.6991, GNorm = 0.5552, lr_0 = 4.9705e-04
Loss = 1.3257e-01, PNorm = 73.7115, GNorm = 0.6687, lr_0 = 4.9671e-04
Loss = 1.1777e-01, PNorm = 73.7201, GNorm = 1.0718, lr_0 = 4.9637e-04
Loss = 1.1797e-01, PNorm = 73.7328, GNorm = 0.5282, lr_0 = 4.9603e-04
Loss = 1.1962e-01, PNorm = 73.7455, GNorm = 0.4282, lr_0 = 4.9569e-04
Loss = 1.3636e-01, PNorm = 73.7612, GNorm = 0.8379, lr_0 = 4.9535e-04
Loss = 1.3121e-01, PNorm = 73.7768, GNorm = 0.9051, lr_0 = 4.9501e-04
Loss = 1.2440e-01, PNorm = 73.7914, GNorm = 0.9546, lr_0 = 4.9467e-04
Loss = 1.3916e-01, PNorm = 73.8011, GNorm = 0.6239, lr_0 = 4.9433e-04
Loss = 1.4127e-01, PNorm = 73.8133, GNorm = 1.2216, lr_0 = 4.9399e-04
Loss = 1.3536e-01, PNorm = 73.8241, GNorm = 0.7750, lr_0 = 4.9365e-04
Loss = 1.3158e-01, PNorm = 73.8337, GNorm = 0.5714, lr_0 = 4.9332e-04
Loss = 1.3874e-01, PNorm = 73.8450, GNorm = 0.5688, lr_0 = 4.9298e-04
Loss = 1.3284e-01, PNorm = 73.8571, GNorm = 1.0639, lr_0 = 4.9264e-04
Loss = 1.4146e-01, PNorm = 73.8725, GNorm = 0.7788, lr_0 = 4.9230e-04
Loss = 1.5893e-01, PNorm = 73.8946, GNorm = 0.7799, lr_0 = 4.9197e-04
Loss = 1.4453e-01, PNorm = 73.9145, GNorm = 0.6301, lr_0 = 4.9163e-04
Loss = 1.3978e-01, PNorm = 73.9304, GNorm = 0.6108, lr_0 = 4.9129e-04
Loss = 1.2228e-01, PNorm = 73.9440, GNorm = 0.7460, lr_0 = 4.9095e-04
Loss = 1.4352e-01, PNorm = 73.9608, GNorm = 0.6317, lr_0 = 4.9062e-04
Loss = 1.3853e-01, PNorm = 73.9816, GNorm = 0.8786, lr_0 = 4.9028e-04
Loss = 1.4697e-01, PNorm = 73.9925, GNorm = 0.9821, lr_0 = 4.8995e-04
Loss = 1.3539e-01, PNorm = 74.0092, GNorm = 0.8348, lr_0 = 4.8961e-04
Loss = 1.4615e-01, PNorm = 74.0241, GNorm = 0.8425, lr_0 = 4.8928e-04
Loss = 1.3548e-01, PNorm = 74.0390, GNorm = 0.5965, lr_0 = 4.8894e-04
Loss = 1.3285e-01, PNorm = 74.0515, GNorm = 0.8271, lr_0 = 4.8861e-04
Loss = 1.3769e-01, PNorm = 74.0615, GNorm = 0.9037, lr_0 = 4.8827e-04
Loss = 1.3283e-01, PNorm = 74.0717, GNorm = 0.7076, lr_0 = 4.8794e-04
Loss = 1.3452e-01, PNorm = 74.0840, GNorm = 0.8188, lr_0 = 4.8760e-04
Loss = 1.4089e-01, PNorm = 74.0998, GNorm = 0.6728, lr_0 = 4.8727e-04
Loss = 1.2910e-01, PNorm = 74.1131, GNorm = 1.1157, lr_0 = 4.8693e-04
Loss = 1.2026e-01, PNorm = 74.1239, GNorm = 0.7402, lr_0 = 4.8660e-04
Loss = 1.3536e-01, PNorm = 74.1355, GNorm = 0.5614, lr_0 = 4.8627e-04
Loss = 1.3797e-01, PNorm = 74.1519, GNorm = 0.9499, lr_0 = 4.8593e-04
Loss = 1.2856e-01, PNorm = 74.1691, GNorm = 1.1388, lr_0 = 4.8560e-04
Loss = 1.2349e-01, PNorm = 74.1779, GNorm = 0.6513, lr_0 = 4.8527e-04
Loss = 1.5297e-01, PNorm = 74.1889, GNorm = 1.4548, lr_0 = 4.8494e-04
Loss = 1.5284e-01, PNorm = 74.2001, GNorm = 0.8337, lr_0 = 4.8460e-04
Loss = 1.2513e-01, PNorm = 74.2160, GNorm = 0.5086, lr_0 = 4.8427e-04
Loss = 1.5689e-01, PNorm = 74.2336, GNorm = 0.7130, lr_0 = 4.8394e-04
Loss = 1.3522e-01, PNorm = 74.2543, GNorm = 0.6648, lr_0 = 4.8361e-04
Loss = 1.4105e-01, PNorm = 74.2671, GNorm = 1.2853, lr_0 = 4.8328e-04
Loss = 1.4035e-01, PNorm = 74.2765, GNorm = 0.7815, lr_0 = 4.8295e-04
Loss = 1.5246e-01, PNorm = 74.2925, GNorm = 0.6060, lr_0 = 4.8262e-04
Loss = 1.3172e-01, PNorm = 74.3085, GNorm = 0.7145, lr_0 = 4.8228e-04
Loss = 1.3931e-01, PNorm = 74.3203, GNorm = 1.4484, lr_0 = 4.8195e-04
Loss = 1.4307e-01, PNorm = 74.3352, GNorm = 0.9154, lr_0 = 4.8162e-04
Loss = 1.3527e-01, PNorm = 74.3480, GNorm = 0.5353, lr_0 = 4.8129e-04
Loss = 1.3570e-01, PNorm = 74.3613, GNorm = 0.5106, lr_0 = 4.8096e-04
Loss = 1.2336e-01, PNorm = 74.3729, GNorm = 1.1572, lr_0 = 4.8064e-04
Loss = 1.3675e-01, PNorm = 74.3811, GNorm = 0.8148, lr_0 = 4.8031e-04
Loss = 1.2317e-01, PNorm = 74.4001, GNorm = 1.1993, lr_0 = 4.7998e-04
Loss = 1.2013e-01, PNorm = 74.4092, GNorm = 0.7244, lr_0 = 4.7965e-04
Loss = 1.2931e-01, PNorm = 74.4251, GNorm = 0.9373, lr_0 = 4.7932e-04
Loss = 1.4082e-01, PNorm = 74.4392, GNorm = 0.6701, lr_0 = 4.7899e-04
Loss = 1.3973e-01, PNorm = 74.4526, GNorm = 0.7371, lr_0 = 4.7866e-04
Loss = 1.4156e-01, PNorm = 74.4609, GNorm = 0.6294, lr_0 = 4.7833e-04
Loss = 1.4991e-01, PNorm = 74.4740, GNorm = 0.6253, lr_0 = 4.7801e-04
Loss = 1.5201e-01, PNorm = 74.4848, GNorm = 0.8193, lr_0 = 4.7768e-04
Loss = 1.3480e-01, PNorm = 74.4933, GNorm = 0.8715, lr_0 = 4.7735e-04
Loss = 1.2799e-01, PNorm = 74.5024, GNorm = 0.5699, lr_0 = 4.7703e-04
Validation mae = 0.233945
Epoch 11
Loss = 1.1160e-01, PNorm = 74.5161, GNorm = 0.7322, lr_0 = 4.7670e-04
Loss = 1.2308e-01, PNorm = 74.5268, GNorm = 1.0585, lr_0 = 4.7637e-04
Loss = 1.3630e-01, PNorm = 74.5405, GNorm = 0.7488, lr_0 = 4.7605e-04
Loss = 1.3544e-01, PNorm = 74.5618, GNorm = 0.6831, lr_0 = 4.7572e-04
Loss = 1.2263e-01, PNorm = 74.5791, GNorm = 0.6175, lr_0 = 4.7539e-04
Loss = 1.2309e-01, PNorm = 74.5969, GNorm = 0.6890, lr_0 = 4.7507e-04
Loss = 1.2636e-01, PNorm = 74.6051, GNorm = 0.5186, lr_0 = 4.7474e-04
Loss = 1.3156e-01, PNorm = 74.6217, GNorm = 0.5195, lr_0 = 4.7442e-04
Loss = 1.1712e-01, PNorm = 74.6390, GNorm = 0.6628, lr_0 = 4.7409e-04
Loss = 1.3319e-01, PNorm = 74.6497, GNorm = 0.5513, lr_0 = 4.7377e-04
Loss = 1.4315e-01, PNorm = 74.6651, GNorm = 0.9831, lr_0 = 4.7344e-04
Loss = 1.1997e-01, PNorm = 74.6831, GNorm = 1.0101, lr_0 = 4.7312e-04
Loss = 1.3472e-01, PNorm = 74.6959, GNorm = 0.8150, lr_0 = 4.7279e-04
Loss = 1.4310e-01, PNorm = 74.7102, GNorm = 1.2710, lr_0 = 4.7247e-04
Loss = 1.5223e-01, PNorm = 74.7243, GNorm = 0.9943, lr_0 = 4.7215e-04
Loss = 1.3369e-01, PNorm = 74.7355, GNorm = 1.1227, lr_0 = 4.7182e-04
Loss = 1.1586e-01, PNorm = 74.7467, GNorm = 0.9102, lr_0 = 4.7150e-04
Loss = 1.3506e-01, PNorm = 74.7651, GNorm = 0.7930, lr_0 = 4.7118e-04
Loss = 1.4653e-01, PNorm = 74.7851, GNorm = 1.0984, lr_0 = 4.7085e-04
Loss = 1.3752e-01, PNorm = 74.8079, GNorm = 0.6613, lr_0 = 4.7053e-04
Loss = 1.2339e-01, PNorm = 74.8223, GNorm = 0.5734, lr_0 = 4.7021e-04
Loss = 1.2672e-01, PNorm = 74.8379, GNorm = 0.8570, lr_0 = 4.6989e-04
Loss = 1.3704e-01, PNorm = 74.8534, GNorm = 0.4187, lr_0 = 4.6957e-04
Loss = 1.2598e-01, PNorm = 74.8678, GNorm = 1.0902, lr_0 = 4.6924e-04
Loss = 1.3179e-01, PNorm = 74.8818, GNorm = 0.9214, lr_0 = 4.6892e-04
Loss = 1.2344e-01, PNorm = 74.8972, GNorm = 0.7144, lr_0 = 4.6860e-04
Loss = 1.1993e-01, PNorm = 74.9109, GNorm = 0.4701, lr_0 = 4.6828e-04
Loss = 1.2142e-01, PNorm = 74.9209, GNorm = 0.5717, lr_0 = 4.6796e-04
Loss = 1.1502e-01, PNorm = 74.9330, GNorm = 0.8562, lr_0 = 4.6764e-04
Loss = 1.2993e-01, PNorm = 74.9410, GNorm = 0.8127, lr_0 = 4.6732e-04
Loss = 1.2762e-01, PNorm = 74.9600, GNorm = 0.8922, lr_0 = 4.6700e-04
Loss = 1.2583e-01, PNorm = 74.9794, GNorm = 0.7657, lr_0 = 4.6668e-04
Loss = 1.3291e-01, PNorm = 74.9963, GNorm = 0.7776, lr_0 = 4.6636e-04
Loss = 1.3973e-01, PNorm = 75.0170, GNorm = 0.8318, lr_0 = 4.6604e-04
Loss = 1.2080e-01, PNorm = 75.0321, GNorm = 0.6264, lr_0 = 4.6572e-04
Loss = 1.2188e-01, PNorm = 75.0414, GNorm = 0.9981, lr_0 = 4.6540e-04
Loss = 1.2098e-01, PNorm = 75.0485, GNorm = 0.8722, lr_0 = 4.6508e-04
Loss = 1.3138e-01, PNorm = 75.0603, GNorm = 1.6528, lr_0 = 4.6476e-04
Loss = 1.1312e-01, PNorm = 75.0736, GNorm = 0.5130, lr_0 = 4.6445e-04
Loss = 1.2047e-01, PNorm = 75.0851, GNorm = 1.0843, lr_0 = 4.6413e-04
Loss = 1.2192e-01, PNorm = 75.0929, GNorm = 1.1972, lr_0 = 4.6381e-04
Loss = 1.3262e-01, PNorm = 75.1035, GNorm = 0.6293, lr_0 = 4.6349e-04
Loss = 1.2573e-01, PNorm = 75.1160, GNorm = 0.5328, lr_0 = 4.6317e-04
Loss = 1.2784e-01, PNorm = 75.1264, GNorm = 0.6435, lr_0 = 4.6286e-04
Loss = 1.2565e-01, PNorm = 75.1366, GNorm = 0.5904, lr_0 = 4.6254e-04
Loss = 1.1810e-01, PNorm = 75.1498, GNorm = 0.5619, lr_0 = 4.6222e-04
Loss = 1.2077e-01, PNorm = 75.1584, GNorm = 0.9101, lr_0 = 4.6191e-04
Loss = 1.2250e-01, PNorm = 75.1670, GNorm = 0.8129, lr_0 = 4.6159e-04
Loss = 1.3490e-01, PNorm = 75.1822, GNorm = 0.9634, lr_0 = 4.6127e-04
Loss = 1.2992e-01, PNorm = 75.1950, GNorm = 1.0047, lr_0 = 4.6096e-04
Loss = 1.1364e-01, PNorm = 75.2108, GNorm = 0.7460, lr_0 = 4.6064e-04
Loss = 1.2641e-01, PNorm = 75.2254, GNorm = 0.7874, lr_0 = 4.6033e-04
Loss = 1.2311e-01, PNorm = 75.2410, GNorm = 0.6874, lr_0 = 4.6001e-04
Loss = 1.4841e-01, PNorm = 75.2595, GNorm = 0.7479, lr_0 = 4.5970e-04
Loss = 1.1388e-01, PNorm = 75.2693, GNorm = 0.4677, lr_0 = 4.5938e-04
Loss = 1.3092e-01, PNorm = 75.2852, GNorm = 0.7523, lr_0 = 4.5907e-04
Loss = 1.1571e-01, PNorm = 75.2993, GNorm = 0.4830, lr_0 = 4.5875e-04
Loss = 1.3171e-01, PNorm = 75.3111, GNorm = 0.5576, lr_0 = 4.5844e-04
Loss = 1.2944e-01, PNorm = 75.3237, GNorm = 0.8956, lr_0 = 4.5812e-04
Loss = 1.4769e-01, PNorm = 75.3360, GNorm = 1.1378, lr_0 = 4.5781e-04
Loss = 1.3560e-01, PNorm = 75.3510, GNorm = 1.0365, lr_0 = 4.5750e-04
Loss = 1.2033e-01, PNorm = 75.3626, GNorm = 0.5392, lr_0 = 4.5718e-04
Loss = 1.2267e-01, PNorm = 75.3777, GNorm = 0.7460, lr_0 = 4.5687e-04
Loss = 1.1847e-01, PNorm = 75.3896, GNorm = 0.5342, lr_0 = 4.5656e-04
Loss = 1.3097e-01, PNorm = 75.4044, GNorm = 0.6819, lr_0 = 4.5624e-04
Loss = 1.3752e-01, PNorm = 75.4142, GNorm = 0.7258, lr_0 = 4.5593e-04
Loss = 1.3111e-01, PNorm = 75.4237, GNorm = 1.0796, lr_0 = 4.5562e-04
Loss = 1.3161e-01, PNorm = 75.4383, GNorm = 0.6756, lr_0 = 4.5531e-04
Loss = 1.2571e-01, PNorm = 75.4506, GNorm = 1.5384, lr_0 = 4.5499e-04
Loss = 1.5585e-01, PNorm = 75.4613, GNorm = 1.3900, lr_0 = 4.5468e-04
Loss = 1.4027e-01, PNorm = 75.4786, GNorm = 0.7138, lr_0 = 4.5437e-04
Loss = 1.4493e-01, PNorm = 75.4920, GNorm = 0.8535, lr_0 = 4.5406e-04
Loss = 1.3089e-01, PNorm = 75.5088, GNorm = 0.7665, lr_0 = 4.5375e-04
Loss = 1.6376e-01, PNorm = 75.5239, GNorm = 0.8154, lr_0 = 4.5344e-04
Loss = 1.1997e-01, PNorm = 75.5404, GNorm = 0.6083, lr_0 = 4.5313e-04
Loss = 1.2248e-01, PNorm = 75.5565, GNorm = 0.5392, lr_0 = 4.5282e-04
Loss = 1.2409e-01, PNorm = 75.5676, GNorm = 0.7438, lr_0 = 4.5251e-04
Loss = 1.2526e-01, PNorm = 75.5785, GNorm = 0.7059, lr_0 = 4.5220e-04
Loss = 1.2822e-01, PNorm = 75.5868, GNorm = 0.7037, lr_0 = 4.5189e-04
Loss = 1.1520e-01, PNorm = 75.5984, GNorm = 1.0516, lr_0 = 4.5158e-04
Loss = 1.1945e-01, PNorm = 75.6127, GNorm = 0.7493, lr_0 = 4.5127e-04
Loss = 1.2520e-01, PNorm = 75.6251, GNorm = 0.3968, lr_0 = 4.5096e-04
Loss = 1.3175e-01, PNorm = 75.6378, GNorm = 0.6006, lr_0 = 4.5065e-04
Loss = 1.3504e-01, PNorm = 75.6464, GNorm = 0.7913, lr_0 = 4.5034e-04
Loss = 1.2033e-01, PNorm = 75.6580, GNorm = 0.7648, lr_0 = 4.5003e-04
Loss = 1.0891e-01, PNorm = 75.6708, GNorm = 0.6131, lr_0 = 4.4972e-04
Loss = 1.2776e-01, PNorm = 75.6780, GNorm = 0.5356, lr_0 = 4.4942e-04
Loss = 1.1450e-01, PNorm = 75.6836, GNorm = 0.8881, lr_0 = 4.4911e-04
Loss = 1.3041e-01, PNorm = 75.6959, GNorm = 0.7354, lr_0 = 4.4880e-04
Loss = 1.1550e-01, PNorm = 75.7067, GNorm = 1.2777, lr_0 = 4.4849e-04
Loss = 1.2455e-01, PNorm = 75.7197, GNorm = 1.0509, lr_0 = 4.4819e-04
Loss = 1.3349e-01, PNorm = 75.7345, GNorm = 0.5427, lr_0 = 4.4788e-04
Loss = 1.3391e-01, PNorm = 75.7470, GNorm = 1.6871, lr_0 = 4.4757e-04
Loss = 1.3474e-01, PNorm = 75.7621, GNorm = 0.6981, lr_0 = 4.4727e-04
Loss = 1.4440e-01, PNorm = 75.7741, GNorm = 0.5923, lr_0 = 4.4696e-04
Loss = 1.3759e-01, PNorm = 75.7869, GNorm = 0.5712, lr_0 = 4.4665e-04
Loss = 1.1958e-01, PNorm = 75.7959, GNorm = 0.7782, lr_0 = 4.4635e-04
Loss = 1.4132e-01, PNorm = 75.8081, GNorm = 1.3785, lr_0 = 4.4604e-04
Loss = 1.3364e-01, PNorm = 75.8206, GNorm = 0.6293, lr_0 = 4.4574e-04
Loss = 1.4983e-01, PNorm = 75.8376, GNorm = 1.3660, lr_0 = 4.4543e-04
Loss = 1.4002e-01, PNorm = 75.8515, GNorm = 0.9158, lr_0 = 4.4513e-04
Loss = 1.2926e-01, PNorm = 75.8663, GNorm = 1.0356, lr_0 = 4.4482e-04
Loss = 1.5096e-01, PNorm = 75.8829, GNorm = 1.0704, lr_0 = 4.4452e-04
Loss = 1.3044e-01, PNorm = 75.9001, GNorm = 0.6729, lr_0 = 4.4421e-04
Loss = 1.3455e-01, PNorm = 75.9148, GNorm = 0.7697, lr_0 = 4.4391e-04
Loss = 1.2848e-01, PNorm = 75.9271, GNorm = 0.5935, lr_0 = 4.4360e-04
Loss = 1.0248e-01, PNorm = 75.9379, GNorm = 0.4059, lr_0 = 4.4330e-04
Loss = 1.4503e-01, PNorm = 75.9488, GNorm = 0.8770, lr_0 = 4.4299e-04
Loss = 1.3100e-01, PNorm = 75.9590, GNorm = 0.4870, lr_0 = 4.4269e-04
Loss = 1.3684e-01, PNorm = 75.9732, GNorm = 0.6558, lr_0 = 4.4239e-04
Loss = 1.3434e-01, PNorm = 75.9848, GNorm = 0.8678, lr_0 = 4.4209e-04
Loss = 1.2530e-01, PNorm = 75.9965, GNorm = 0.7329, lr_0 = 4.4178e-04
Loss = 1.2180e-01, PNorm = 76.0072, GNorm = 0.6738, lr_0 = 4.4148e-04
Loss = 1.1083e-01, PNorm = 76.0229, GNorm = 0.6839, lr_0 = 4.4118e-04
Loss = 1.2870e-01, PNorm = 76.0293, GNorm = 0.7365, lr_0 = 4.4088e-04
Loss = 1.1867e-01, PNorm = 76.0396, GNorm = 0.6171, lr_0 = 4.4057e-04
Loss = 1.2373e-01, PNorm = 76.0491, GNorm = 0.7738, lr_0 = 4.4027e-04
Loss = 1.3901e-01, PNorm = 76.0548, GNorm = 0.7997, lr_0 = 4.3997e-04
Loss = 1.2207e-01, PNorm = 76.0664, GNorm = 0.5680, lr_0 = 4.3967e-04
Loss = 1.4590e-01, PNorm = 76.0773, GNorm = 0.6373, lr_0 = 4.3937e-04
Validation mae = 0.234266
Epoch 12
Loss = 1.0386e-01, PNorm = 76.0921, GNorm = 0.6069, lr_0 = 4.3907e-04
Loss = 1.3943e-01, PNorm = 76.1054, GNorm = 0.7574, lr_0 = 4.3877e-04
Loss = 1.1147e-01, PNorm = 76.1203, GNorm = 0.6676, lr_0 = 4.3846e-04
Loss = 1.1854e-01, PNorm = 76.1319, GNorm = 0.7423, lr_0 = 4.3816e-04
Loss = 1.2980e-01, PNorm = 76.1472, GNorm = 0.8253, lr_0 = 4.3786e-04
Loss = 1.3092e-01, PNorm = 76.1619, GNorm = 0.7035, lr_0 = 4.3756e-04
Loss = 1.2767e-01, PNorm = 76.1804, GNorm = 0.6993, lr_0 = 4.3726e-04
Loss = 1.2754e-01, PNorm = 76.1949, GNorm = 0.8697, lr_0 = 4.3696e-04
Loss = 1.1175e-01, PNorm = 76.2045, GNorm = 0.7451, lr_0 = 4.3667e-04
Loss = 1.1604e-01, PNorm = 76.2162, GNorm = 0.6451, lr_0 = 4.3637e-04
Loss = 1.1198e-01, PNorm = 76.2287, GNorm = 0.5692, lr_0 = 4.3607e-04
Loss = 1.2902e-01, PNorm = 76.2424, GNorm = 0.6658, lr_0 = 4.3577e-04
Loss = 1.0770e-01, PNorm = 76.2562, GNorm = 0.8151, lr_0 = 4.3547e-04
Loss = 9.0721e-02, PNorm = 76.2686, GNorm = 1.0141, lr_0 = 4.3517e-04
Loss = 1.2071e-01, PNorm = 76.2779, GNorm = 0.8102, lr_0 = 4.3487e-04
Loss = 1.2340e-01, PNorm = 76.2885, GNorm = 0.6391, lr_0 = 4.3458e-04
Loss = 1.1784e-01, PNorm = 76.2997, GNorm = 0.5205, lr_0 = 4.3428e-04
Loss = 1.2154e-01, PNorm = 76.3153, GNorm = 0.9131, lr_0 = 4.3398e-04
Loss = 1.1406e-01, PNorm = 76.3259, GNorm = 0.5739, lr_0 = 4.3368e-04
Loss = 1.2094e-01, PNorm = 76.3365, GNorm = 0.5550, lr_0 = 4.3339e-04
Loss = 1.2960e-01, PNorm = 76.3469, GNorm = 0.6328, lr_0 = 4.3309e-04
Loss = 1.1874e-01, PNorm = 76.3596, GNorm = 0.5851, lr_0 = 4.3279e-04
Loss = 1.3444e-01, PNorm = 76.3742, GNorm = 0.5409, lr_0 = 4.3250e-04
Loss = 1.2422e-01, PNorm = 76.3867, GNorm = 0.6549, lr_0 = 4.3220e-04
Loss = 1.2975e-01, PNorm = 76.3962, GNorm = 0.5203, lr_0 = 4.3190e-04
Loss = 1.0741e-01, PNorm = 76.4113, GNorm = 0.7749, lr_0 = 4.3161e-04
Loss = 1.0574e-01, PNorm = 76.4251, GNorm = 0.6214, lr_0 = 4.3131e-04
Loss = 1.1919e-01, PNorm = 76.4383, GNorm = 0.5280, lr_0 = 4.3102e-04
Loss = 1.2684e-01, PNorm = 76.4502, GNorm = 0.9735, lr_0 = 4.3072e-04
Loss = 1.2296e-01, PNorm = 76.4607, GNorm = 1.0193, lr_0 = 4.3043e-04
Loss = 1.1885e-01, PNorm = 76.4718, GNorm = 0.5688, lr_0 = 4.3013e-04
Loss = 1.2489e-01, PNorm = 76.4862, GNorm = 0.8969, lr_0 = 4.2984e-04
Loss = 1.3271e-01, PNorm = 76.4981, GNorm = 0.8849, lr_0 = 4.2954e-04
Loss = 1.2468e-01, PNorm = 76.5063, GNorm = 0.7987, lr_0 = 4.2925e-04
Loss = 1.1647e-01, PNorm = 76.5177, GNorm = 0.4999, lr_0 = 4.2895e-04
Loss = 1.1479e-01, PNorm = 76.5290, GNorm = 0.6364, lr_0 = 4.2866e-04
Loss = 1.3046e-01, PNorm = 76.5349, GNorm = 0.6525, lr_0 = 4.2837e-04
Loss = 1.1081e-01, PNorm = 76.5478, GNorm = 0.5877, lr_0 = 4.2807e-04
Loss = 1.2501e-01, PNorm = 76.5603, GNorm = 0.9136, lr_0 = 4.2778e-04
Loss = 1.2390e-01, PNorm = 76.5736, GNorm = 0.8264, lr_0 = 4.2749e-04
Loss = 1.3413e-01, PNorm = 76.5802, GNorm = 0.7617, lr_0 = 4.2719e-04
Loss = 1.3034e-01, PNorm = 76.5870, GNorm = 0.9768, lr_0 = 4.2690e-04
Loss = 1.1301e-01, PNorm = 76.6020, GNorm = 0.4850, lr_0 = 4.2661e-04
Loss = 1.1920e-01, PNorm = 76.6203, GNorm = 0.5729, lr_0 = 4.2632e-04
Loss = 1.2971e-01, PNorm = 76.6301, GNorm = 0.9172, lr_0 = 4.2602e-04
Loss = 1.3493e-01, PNorm = 76.6432, GNorm = 0.7043, lr_0 = 4.2573e-04
Loss = 1.1208e-01, PNorm = 76.6558, GNorm = 1.0466, lr_0 = 4.2544e-04
Loss = 1.1456e-01, PNorm = 76.6659, GNorm = 0.6670, lr_0 = 4.2515e-04
Loss = 1.2933e-01, PNorm = 76.6779, GNorm = 0.5641, lr_0 = 4.2486e-04
Loss = 1.3275e-01, PNorm = 76.6881, GNorm = 0.5344, lr_0 = 4.2457e-04
Loss = 1.3708e-01, PNorm = 76.7007, GNorm = 0.7627, lr_0 = 4.2428e-04
Loss = 1.1412e-01, PNorm = 76.7145, GNorm = 0.6690, lr_0 = 4.2399e-04
Loss = 1.2321e-01, PNorm = 76.7258, GNorm = 0.5785, lr_0 = 4.2370e-04
Loss = 1.1486e-01, PNorm = 76.7364, GNorm = 1.0541, lr_0 = 4.2340e-04
Loss = 1.1903e-01, PNorm = 76.7495, GNorm = 0.7210, lr_0 = 4.2311e-04
Loss = 1.2521e-01, PNorm = 76.7559, GNorm = 0.8361, lr_0 = 4.2283e-04
Loss = 1.2147e-01, PNorm = 76.7656, GNorm = 0.6957, lr_0 = 4.2254e-04
Loss = 1.2394e-01, PNorm = 76.7767, GNorm = 0.6673, lr_0 = 4.2225e-04
Loss = 1.1790e-01, PNorm = 76.7910, GNorm = 0.9800, lr_0 = 4.2196e-04
Loss = 1.1314e-01, PNorm = 76.8036, GNorm = 1.0140, lr_0 = 4.2167e-04
Loss = 1.2046e-01, PNorm = 76.8154, GNorm = 0.6207, lr_0 = 4.2138e-04
Loss = 1.1993e-01, PNorm = 76.8226, GNorm = 0.6921, lr_0 = 4.2109e-04
Loss = 9.9736e-02, PNorm = 76.8317, GNorm = 0.5528, lr_0 = 4.2080e-04
Loss = 1.0803e-01, PNorm = 76.8449, GNorm = 0.7097, lr_0 = 4.2051e-04
Loss = 1.2909e-01, PNorm = 76.8539, GNorm = 0.7177, lr_0 = 4.2023e-04
Loss = 1.2471e-01, PNorm = 76.8600, GNorm = 1.3178, lr_0 = 4.1994e-04
Loss = 1.2027e-01, PNorm = 76.8717, GNorm = 1.0601, lr_0 = 4.1965e-04
Loss = 1.4155e-01, PNorm = 76.8817, GNorm = 0.6906, lr_0 = 4.1936e-04
Loss = 1.2507e-01, PNorm = 76.8950, GNorm = 0.8142, lr_0 = 4.1907e-04
Loss = 1.2007e-01, PNorm = 76.9079, GNorm = 0.5710, lr_0 = 4.1879e-04
Loss = 1.2178e-01, PNorm = 76.9193, GNorm = 0.8676, lr_0 = 4.1850e-04
Loss = 1.3518e-01, PNorm = 76.9292, GNorm = 0.6715, lr_0 = 4.1821e-04
Loss = 1.4322e-01, PNorm = 76.9428, GNorm = 0.8266, lr_0 = 4.1793e-04
Loss = 1.1459e-01, PNorm = 76.9529, GNorm = 0.5665, lr_0 = 4.1764e-04
Loss = 1.3751e-01, PNorm = 76.9631, GNorm = 0.9150, lr_0 = 4.1736e-04
Loss = 1.3777e-01, PNorm = 76.9749, GNorm = 0.5118, lr_0 = 4.1707e-04
Loss = 1.3173e-01, PNorm = 76.9863, GNorm = 0.7036, lr_0 = 4.1678e-04
Loss = 1.3468e-01, PNorm = 76.9965, GNorm = 0.9144, lr_0 = 4.1650e-04
Loss = 1.2466e-01, PNorm = 77.0050, GNorm = 0.5930, lr_0 = 4.1621e-04
Loss = 1.2221e-01, PNorm = 77.0116, GNorm = 0.6809, lr_0 = 4.1593e-04
Loss = 1.2292e-01, PNorm = 77.0214, GNorm = 0.7903, lr_0 = 4.1564e-04
Loss = 1.0886e-01, PNorm = 77.0284, GNorm = 0.5537, lr_0 = 4.1536e-04
Loss = 1.3752e-01, PNorm = 77.0390, GNorm = 0.6697, lr_0 = 4.1507e-04
Loss = 1.2750e-01, PNorm = 77.0534, GNorm = 0.7224, lr_0 = 4.1479e-04
Loss = 1.2287e-01, PNorm = 77.0657, GNorm = 0.6574, lr_0 = 4.1450e-04
Loss = 1.2601e-01, PNorm = 77.0729, GNorm = 0.7422, lr_0 = 4.1422e-04
Loss = 1.2336e-01, PNorm = 77.0834, GNorm = 0.6638, lr_0 = 4.1394e-04
Loss = 1.1964e-01, PNorm = 77.0962, GNorm = 0.9576, lr_0 = 4.1365e-04
Loss = 1.2308e-01, PNorm = 77.1079, GNorm = 0.4824, lr_0 = 4.1337e-04
Loss = 1.1095e-01, PNorm = 77.1166, GNorm = 0.4551, lr_0 = 4.1309e-04
Loss = 1.1407e-01, PNorm = 77.1264, GNorm = 0.6373, lr_0 = 4.1280e-04
Loss = 1.1295e-01, PNorm = 77.1356, GNorm = 0.5994, lr_0 = 4.1252e-04
Loss = 1.2575e-01, PNorm = 77.1455, GNorm = 0.4836, lr_0 = 4.1224e-04
Loss = 1.3811e-01, PNorm = 77.1570, GNorm = 0.6339, lr_0 = 4.1196e-04
Loss = 1.1879e-01, PNorm = 77.1644, GNorm = 0.5681, lr_0 = 4.1167e-04
Loss = 1.2422e-01, PNorm = 77.1750, GNorm = 0.7702, lr_0 = 4.1139e-04
Loss = 1.2722e-01, PNorm = 77.1827, GNorm = 0.7896, lr_0 = 4.1111e-04
Loss = 1.2234e-01, PNorm = 77.1918, GNorm = 1.0766, lr_0 = 4.1083e-04
Loss = 1.2506e-01, PNorm = 77.2019, GNorm = 0.5957, lr_0 = 4.1055e-04
Loss = 1.1022e-01, PNorm = 77.2123, GNorm = 0.5204, lr_0 = 4.1027e-04
Loss = 1.2483e-01, PNorm = 77.2211, GNorm = 0.6262, lr_0 = 4.0998e-04
Loss = 1.3426e-01, PNorm = 77.2327, GNorm = 0.5630, lr_0 = 4.0970e-04
Loss = 1.0962e-01, PNorm = 77.2444, GNorm = 0.7158, lr_0 = 4.0942e-04
Loss = 1.2186e-01, PNorm = 77.2553, GNorm = 0.6941, lr_0 = 4.0914e-04
Loss = 1.2813e-01, PNorm = 77.2674, GNorm = 0.8799, lr_0 = 4.0886e-04
Loss = 1.2117e-01, PNorm = 77.2791, GNorm = 0.7160, lr_0 = 4.0858e-04
Loss = 1.2698e-01, PNorm = 77.2878, GNorm = 1.0723, lr_0 = 4.0830e-04
Loss = 1.3124e-01, PNorm = 77.2929, GNorm = 0.6245, lr_0 = 4.0802e-04
Loss = 1.1125e-01, PNorm = 77.3025, GNorm = 0.5082, lr_0 = 4.0774e-04
Loss = 1.0866e-01, PNorm = 77.3068, GNorm = 0.7942, lr_0 = 4.0746e-04
Loss = 1.2460e-01, PNorm = 77.3167, GNorm = 0.9115, lr_0 = 4.0718e-04
Loss = 1.2215e-01, PNorm = 77.3261, GNorm = 0.5557, lr_0 = 4.0691e-04
Loss = 1.2736e-01, PNorm = 77.3365, GNorm = 0.6706, lr_0 = 4.0663e-04
Loss = 1.0169e-01, PNorm = 77.3435, GNorm = 0.7325, lr_0 = 4.0635e-04
Loss = 1.2246e-01, PNorm = 77.3553, GNorm = 0.6794, lr_0 = 4.0607e-04
Loss = 1.2364e-01, PNorm = 77.3634, GNorm = 0.7026, lr_0 = 4.0579e-04
Loss = 1.2587e-01, PNorm = 77.3682, GNorm = 0.8123, lr_0 = 4.0551e-04
Loss = 1.1396e-01, PNorm = 77.3778, GNorm = 1.1569, lr_0 = 4.0524e-04
Loss = 1.1978e-01, PNorm = 77.3860, GNorm = 0.5787, lr_0 = 4.0496e-04
Loss = 1.0917e-01, PNorm = 77.3997, GNorm = 0.8518, lr_0 = 4.0468e-04
Validation mae = 0.237123
Epoch 13
Loss = 1.0074e-01, PNorm = 77.4131, GNorm = 0.4419, lr_0 = 4.0440e-04
Loss = 1.2456e-01, PNorm = 77.4250, GNorm = 0.4752, lr_0 = 4.0413e-04
Loss = 1.1786e-01, PNorm = 77.4358, GNorm = 0.5592, lr_0 = 4.0385e-04
Loss = 1.2068e-01, PNorm = 77.4476, GNorm = 0.9641, lr_0 = 4.0357e-04
Loss = 1.0406e-01, PNorm = 77.4614, GNorm = 0.5289, lr_0 = 4.0330e-04
Loss = 1.1865e-01, PNorm = 77.4717, GNorm = 0.9381, lr_0 = 4.0302e-04
Loss = 1.1974e-01, PNorm = 77.4821, GNorm = 0.8678, lr_0 = 4.0274e-04
Loss = 1.1321e-01, PNorm = 77.4939, GNorm = 0.6181, lr_0 = 4.0247e-04
Loss = 1.1118e-01, PNorm = 77.5044, GNorm = 0.7196, lr_0 = 4.0219e-04
Loss = 1.0894e-01, PNorm = 77.5134, GNorm = 0.6712, lr_0 = 4.0192e-04
Loss = 1.2079e-01, PNorm = 77.5245, GNorm = 0.5434, lr_0 = 4.0164e-04
Loss = 1.4890e-01, PNorm = 77.5398, GNorm = 0.8079, lr_0 = 4.0137e-04
Loss = 1.1017e-01, PNorm = 77.5561, GNorm = 0.7513, lr_0 = 4.0109e-04
Loss = 9.4853e-02, PNorm = 77.5671, GNorm = 0.5867, lr_0 = 4.0082e-04
Loss = 1.1487e-01, PNorm = 77.5793, GNorm = 0.6461, lr_0 = 4.0054e-04
Loss = 1.1539e-01, PNorm = 77.5872, GNorm = 0.6706, lr_0 = 4.0027e-04
Loss = 1.0839e-01, PNorm = 77.5961, GNorm = 0.6469, lr_0 = 3.9999e-04
Loss = 1.1899e-01, PNorm = 77.6028, GNorm = 1.1014, lr_0 = 3.9972e-04
Loss = 1.2826e-01, PNorm = 77.6132, GNorm = 0.6545, lr_0 = 3.9945e-04
Loss = 1.0947e-01, PNorm = 77.6242, GNorm = 1.3172, lr_0 = 3.9917e-04
Loss = 1.5285e-01, PNorm = 77.6384, GNorm = 0.6119, lr_0 = 3.9890e-04
Loss = 1.1693e-01, PNorm = 77.6507, GNorm = 0.4650, lr_0 = 3.9863e-04
Loss = 1.2228e-01, PNorm = 77.6628, GNorm = 0.9187, lr_0 = 3.9835e-04
Loss = 1.1527e-01, PNorm = 77.6759, GNorm = 0.6383, lr_0 = 3.9808e-04
Loss = 1.1267e-01, PNorm = 77.6914, GNorm = 0.6375, lr_0 = 3.9781e-04
Loss = 1.0022e-01, PNorm = 77.7006, GNorm = 0.5564, lr_0 = 3.9753e-04
Loss = 1.1695e-01, PNorm = 77.7135, GNorm = 0.9351, lr_0 = 3.9726e-04
Loss = 1.1394e-01, PNorm = 77.7222, GNorm = 0.6954, lr_0 = 3.9699e-04
Loss = 1.1511e-01, PNorm = 77.7329, GNorm = 0.6112, lr_0 = 3.9672e-04
Loss = 1.2618e-01, PNorm = 77.7433, GNorm = 1.2191, lr_0 = 3.9645e-04
Loss = 1.2421e-01, PNorm = 77.7501, GNorm = 0.7139, lr_0 = 3.9617e-04
Loss = 1.1291e-01, PNorm = 77.7628, GNorm = 0.6681, lr_0 = 3.9590e-04
Loss = 1.1603e-01, PNorm = 77.7768, GNorm = 0.8691, lr_0 = 3.9563e-04
Loss = 1.0571e-01, PNorm = 77.7820, GNorm = 0.4075, lr_0 = 3.9536e-04
Loss = 1.2646e-01, PNorm = 77.7926, GNorm = 0.7657, lr_0 = 3.9509e-04
Loss = 1.1776e-01, PNorm = 77.8057, GNorm = 0.9135, lr_0 = 3.9482e-04
Loss = 1.1386e-01, PNorm = 77.8103, GNorm = 0.7742, lr_0 = 3.9455e-04
Loss = 1.1765e-01, PNorm = 77.8187, GNorm = 0.5476, lr_0 = 3.9428e-04
Loss = 1.1100e-01, PNorm = 77.8296, GNorm = 0.5128, lr_0 = 3.9401e-04
Loss = 1.1395e-01, PNorm = 77.8391, GNorm = 0.5888, lr_0 = 3.9374e-04
Loss = 1.0556e-01, PNorm = 77.8486, GNorm = 0.7081, lr_0 = 3.9347e-04
Loss = 9.9338e-02, PNorm = 77.8549, GNorm = 0.5086, lr_0 = 3.9320e-04
Loss = 1.2781e-01, PNorm = 77.8645, GNorm = 0.5340, lr_0 = 3.9293e-04
Loss = 1.1876e-01, PNorm = 77.8785, GNorm = 1.2379, lr_0 = 3.9266e-04
Loss = 1.0453e-01, PNorm = 77.8877, GNorm = 0.5795, lr_0 = 3.9239e-04
Loss = 1.0645e-01, PNorm = 77.8961, GNorm = 0.6448, lr_0 = 3.9212e-04
Loss = 1.2069e-01, PNorm = 77.9032, GNorm = 0.5730, lr_0 = 3.9185e-04
Loss = 1.2434e-01, PNorm = 77.9144, GNorm = 0.6619, lr_0 = 3.9159e-04
Loss = 1.1398e-01, PNorm = 77.9226, GNorm = 0.6738, lr_0 = 3.9132e-04
Loss = 1.1221e-01, PNorm = 77.9362, GNorm = 0.6530, lr_0 = 3.9105e-04
Loss = 1.0942e-01, PNorm = 77.9506, GNorm = 0.7702, lr_0 = 3.9078e-04
Loss = 1.2357e-01, PNorm = 77.9568, GNorm = 0.7209, lr_0 = 3.9051e-04
Loss = 1.1439e-01, PNorm = 77.9665, GNorm = 0.8385, lr_0 = 3.9025e-04
Loss = 1.1274e-01, PNorm = 77.9782, GNorm = 0.7800, lr_0 = 3.8998e-04
Loss = 1.2914e-01, PNorm = 77.9869, GNorm = 0.9899, lr_0 = 3.8971e-04
Loss = 1.2971e-01, PNorm = 78.0010, GNorm = 0.7896, lr_0 = 3.8945e-04
Loss = 1.2225e-01, PNorm = 78.0079, GNorm = 0.5534, lr_0 = 3.8918e-04
Loss = 1.1709e-01, PNorm = 78.0202, GNorm = 0.5855, lr_0 = 3.8891e-04
Loss = 1.2041e-01, PNorm = 78.0325, GNorm = 0.4956, lr_0 = 3.8865e-04
Loss = 1.1072e-01, PNorm = 78.0438, GNorm = 0.9651, lr_0 = 3.8838e-04
Loss = 1.1138e-01, PNorm = 78.0548, GNorm = 0.4789, lr_0 = 3.8811e-04
Loss = 1.0553e-01, PNorm = 78.0638, GNorm = 0.5968, lr_0 = 3.8785e-04
Loss = 1.1446e-01, PNorm = 78.0713, GNorm = 0.6997, lr_0 = 3.8758e-04
Loss = 1.2084e-01, PNorm = 78.0787, GNorm = 1.2060, lr_0 = 3.8732e-04
Loss = 1.2717e-01, PNorm = 78.0868, GNorm = 0.7906, lr_0 = 3.8705e-04
Loss = 1.2956e-01, PNorm = 78.0969, GNorm = 0.6791, lr_0 = 3.8679e-04
Loss = 1.1555e-01, PNorm = 78.1080, GNorm = 0.9308, lr_0 = 3.8652e-04
Loss = 1.0596e-01, PNorm = 78.1225, GNorm = 0.8686, lr_0 = 3.8626e-04
Loss = 1.0532e-01, PNorm = 78.1338, GNorm = 0.5987, lr_0 = 3.8599e-04
Loss = 1.2579e-01, PNorm = 78.1387, GNorm = 0.6415, lr_0 = 3.8573e-04
Loss = 1.2420e-01, PNorm = 78.1438, GNorm = 0.6160, lr_0 = 3.8546e-04
Loss = 1.0717e-01, PNorm = 78.1580, GNorm = 0.7663, lr_0 = 3.8520e-04
Loss = 1.1371e-01, PNorm = 78.1659, GNorm = 0.9167, lr_0 = 3.8493e-04
Loss = 1.0565e-01, PNorm = 78.1739, GNorm = 0.8570, lr_0 = 3.8467e-04
Loss = 1.0458e-01, PNorm = 78.1840, GNorm = 0.7203, lr_0 = 3.8441e-04
Loss = 1.2401e-01, PNorm = 78.1925, GNorm = 0.5307, lr_0 = 3.8414e-04
Loss = 1.0630e-01, PNorm = 78.2006, GNorm = 0.4177, lr_0 = 3.8388e-04
Loss = 1.0593e-01, PNorm = 78.2066, GNorm = 0.6657, lr_0 = 3.8362e-04
Loss = 1.2881e-01, PNorm = 78.2175, GNorm = 0.6126, lr_0 = 3.8336e-04
Loss = 1.1590e-01, PNorm = 78.2231, GNorm = 1.0323, lr_0 = 3.8309e-04
Loss = 1.0695e-01, PNorm = 78.2275, GNorm = 0.5040, lr_0 = 3.8283e-04
Loss = 1.1721e-01, PNorm = 78.2428, GNorm = 1.1118, lr_0 = 3.8257e-04
Loss = 1.2863e-01, PNorm = 78.2532, GNorm = 0.8142, lr_0 = 3.8231e-04
Loss = 1.2484e-01, PNorm = 78.2642, GNorm = 0.8192, lr_0 = 3.8204e-04
Loss = 1.1954e-01, PNorm = 78.2786, GNorm = 0.7517, lr_0 = 3.8178e-04
Loss = 1.2875e-01, PNorm = 78.2917, GNorm = 0.6980, lr_0 = 3.8152e-04
Loss = 1.1393e-01, PNorm = 78.2985, GNorm = 0.5988, lr_0 = 3.8126e-04
Loss = 1.1276e-01, PNorm = 78.3049, GNorm = 0.5562, lr_0 = 3.8100e-04
Loss = 1.1623e-01, PNorm = 78.3134, GNorm = 0.6946, lr_0 = 3.8074e-04
Loss = 1.1782e-01, PNorm = 78.3246, GNorm = 0.8150, lr_0 = 3.8048e-04
Loss = 1.1210e-01, PNorm = 78.3349, GNorm = 0.8788, lr_0 = 3.8022e-04
Loss = 1.0759e-01, PNorm = 78.3381, GNorm = 0.4624, lr_0 = 3.7995e-04
Loss = 1.2505e-01, PNorm = 78.3434, GNorm = 0.9623, lr_0 = 3.7969e-04
Loss = 1.2305e-01, PNorm = 78.3501, GNorm = 0.6322, lr_0 = 3.7943e-04
Loss = 9.2657e-02, PNorm = 78.3617, GNorm = 0.5822, lr_0 = 3.7917e-04
Loss = 1.3037e-01, PNorm = 78.3712, GNorm = 0.5931, lr_0 = 3.7891e-04
Loss = 1.2532e-01, PNorm = 78.3800, GNorm = 0.5097, lr_0 = 3.7866e-04
Loss = 1.2978e-01, PNorm = 78.3896, GNorm = 0.8699, lr_0 = 3.7840e-04
Loss = 1.1750e-01, PNorm = 78.3980, GNorm = 0.6636, lr_0 = 3.7814e-04
Loss = 1.1409e-01, PNorm = 78.4046, GNorm = 0.7872, lr_0 = 3.7788e-04
Loss = 1.1143e-01, PNorm = 78.4125, GNorm = 0.7568, lr_0 = 3.7762e-04
Loss = 1.0726e-01, PNorm = 78.4240, GNorm = 0.5368, lr_0 = 3.7736e-04
Loss = 1.1924e-01, PNorm = 78.4296, GNorm = 0.7453, lr_0 = 3.7710e-04
Loss = 1.2045e-01, PNorm = 78.4400, GNorm = 0.6480, lr_0 = 3.7684e-04
Loss = 1.0659e-01, PNorm = 78.4516, GNorm = 0.7609, lr_0 = 3.7659e-04
Loss = 1.2448e-01, PNorm = 78.4626, GNorm = 0.6222, lr_0 = 3.7633e-04
Loss = 1.2874e-01, PNorm = 78.4704, GNorm = 0.6346, lr_0 = 3.7607e-04
Loss = 1.2813e-01, PNorm = 78.4833, GNorm = 0.7615, lr_0 = 3.7581e-04
Loss = 1.1394e-01, PNorm = 78.4945, GNorm = 0.5078, lr_0 = 3.7555e-04
Loss = 1.2410e-01, PNorm = 78.5053, GNorm = 0.9272, lr_0 = 3.7530e-04
Loss = 1.1876e-01, PNorm = 78.5119, GNorm = 0.6479, lr_0 = 3.7504e-04
Loss = 1.1432e-01, PNorm = 78.5210, GNorm = 0.6576, lr_0 = 3.7478e-04
Loss = 1.1167e-01, PNorm = 78.5309, GNorm = 0.7909, lr_0 = 3.7453e-04
Loss = 1.0517e-01, PNorm = 78.5377, GNorm = 0.5284, lr_0 = 3.7427e-04
Loss = 1.1443e-01, PNorm = 78.5485, GNorm = 0.7049, lr_0 = 3.7401e-04
Loss = 1.3382e-01, PNorm = 78.5577, GNorm = 0.7692, lr_0 = 3.7376e-04
Loss = 1.1207e-01, PNorm = 78.5662, GNorm = 0.5731, lr_0 = 3.7350e-04
Loss = 1.1803e-01, PNorm = 78.5782, GNorm = 0.7029, lr_0 = 3.7325e-04
Loss = 1.1445e-01, PNorm = 78.5911, GNorm = 0.7233, lr_0 = 3.7299e-04
Loss = 1.1400e-01, PNorm = 78.5951, GNorm = 0.6954, lr_0 = 3.7273e-04
Validation mae = 0.234251
Epoch 14
Loss = 1.0692e-01, PNorm = 78.6049, GNorm = 0.8939, lr_0 = 3.7248e-04
Loss = 1.1778e-01, PNorm = 78.6089, GNorm = 0.7267, lr_0 = 3.7222e-04
Loss = 1.1376e-01, PNorm = 78.6166, GNorm = 0.8125, lr_0 = 3.7197e-04
Loss = 9.4412e-02, PNorm = 78.6212, GNorm = 0.5103, lr_0 = 3.7171e-04
Loss = 1.0684e-01, PNorm = 78.6300, GNorm = 0.7017, lr_0 = 3.7146e-04
Loss = 1.0783e-01, PNorm = 78.6422, GNorm = 0.5577, lr_0 = 3.7120e-04
Loss = 1.1466e-01, PNorm = 78.6526, GNorm = 0.7241, lr_0 = 3.7095e-04
Loss = 1.0102e-01, PNorm = 78.6629, GNorm = 0.7535, lr_0 = 3.7070e-04
Loss = 1.0680e-01, PNorm = 78.6769, GNorm = 1.0313, lr_0 = 3.7044e-04
Loss = 1.1174e-01, PNorm = 78.6848, GNorm = 0.9564, lr_0 = 3.7019e-04
Loss = 1.0116e-01, PNorm = 78.6951, GNorm = 0.8826, lr_0 = 3.6993e-04
Loss = 1.0722e-01, PNorm = 78.7080, GNorm = 0.5644, lr_0 = 3.6968e-04
Loss = 1.1038e-01, PNorm = 78.7142, GNorm = 0.5401, lr_0 = 3.6943e-04
Loss = 1.0460e-01, PNorm = 78.7186, GNorm = 0.5178, lr_0 = 3.6917e-04
Loss = 1.1205e-01, PNorm = 78.7276, GNorm = 1.0931, lr_0 = 3.6892e-04
Loss = 1.0853e-01, PNorm = 78.7350, GNorm = 0.6750, lr_0 = 3.6867e-04
Loss = 1.1757e-01, PNorm = 78.7449, GNorm = 0.7742, lr_0 = 3.6842e-04
Loss = 1.0030e-01, PNorm = 78.7566, GNorm = 0.8133, lr_0 = 3.6816e-04
Loss = 1.0191e-01, PNorm = 78.7648, GNorm = 0.6385, lr_0 = 3.6791e-04
Loss = 1.0180e-01, PNorm = 78.7762, GNorm = 1.0234, lr_0 = 3.6766e-04
Loss = 1.1773e-01, PNorm = 78.7850, GNorm = 0.5906, lr_0 = 3.6741e-04
Loss = 1.0261e-01, PNorm = 78.7946, GNorm = 0.7798, lr_0 = 3.6716e-04
Loss = 1.0736e-01, PNorm = 78.8017, GNorm = 1.0000, lr_0 = 3.6690e-04
Loss = 1.1781e-01, PNorm = 78.8065, GNorm = 0.8225, lr_0 = 3.6665e-04
Loss = 1.1162e-01, PNorm = 78.8141, GNorm = 0.8424, lr_0 = 3.6640e-04
Loss = 1.0952e-01, PNorm = 78.8258, GNorm = 0.5936, lr_0 = 3.6615e-04
Loss = 1.1022e-01, PNorm = 78.8412, GNorm = 0.5982, lr_0 = 3.6590e-04
Loss = 1.1371e-01, PNorm = 78.8521, GNorm = 0.8465, lr_0 = 3.6565e-04
Loss = 1.0302e-01, PNorm = 78.8603, GNorm = 0.6773, lr_0 = 3.6540e-04
Loss = 1.2820e-01, PNorm = 78.8705, GNorm = 0.7535, lr_0 = 3.6515e-04
Loss = 1.0858e-01, PNorm = 78.8767, GNorm = 0.7134, lr_0 = 3.6490e-04
Loss = 1.0377e-01, PNorm = 78.8878, GNorm = 0.5288, lr_0 = 3.6465e-04
Loss = 1.1664e-01, PNorm = 78.8971, GNorm = 0.5513, lr_0 = 3.6440e-04
Loss = 1.2991e-01, PNorm = 78.9086, GNorm = 1.6105, lr_0 = 3.6415e-04
Loss = 1.3146e-01, PNorm = 78.9197, GNorm = 0.6148, lr_0 = 3.6390e-04
Loss = 1.0702e-01, PNorm = 78.9283, GNorm = 0.6962, lr_0 = 3.6365e-04
Loss = 1.1450e-01, PNorm = 78.9389, GNorm = 0.7660, lr_0 = 3.6340e-04
Loss = 1.1600e-01, PNorm = 78.9513, GNorm = 0.5795, lr_0 = 3.6315e-04
Loss = 1.2850e-01, PNorm = 78.9635, GNorm = 0.9336, lr_0 = 3.6290e-04
Loss = 1.0699e-01, PNorm = 78.9743, GNorm = 0.7667, lr_0 = 3.6266e-04
Loss = 1.1609e-01, PNorm = 78.9834, GNorm = 0.6874, lr_0 = 3.6241e-04
Loss = 9.9478e-02, PNorm = 78.9909, GNorm = 0.6150, lr_0 = 3.6216e-04
Loss = 1.0928e-01, PNorm = 78.9981, GNorm = 0.8263, lr_0 = 3.6191e-04
Loss = 1.0674e-01, PNorm = 79.0067, GNorm = 0.5510, lr_0 = 3.6166e-04
Loss = 1.1537e-01, PNorm = 79.0123, GNorm = 0.7754, lr_0 = 3.6141e-04
Loss = 1.2507e-01, PNorm = 79.0179, GNorm = 0.8087, lr_0 = 3.6117e-04
Loss = 1.0830e-01, PNorm = 79.0244, GNorm = 0.6892, lr_0 = 3.6092e-04
Loss = 9.1159e-02, PNorm = 79.0346, GNorm = 0.4256, lr_0 = 3.6067e-04
Loss = 1.1603e-01, PNorm = 79.0443, GNorm = 0.6445, lr_0 = 3.6043e-04
Loss = 1.1526e-01, PNorm = 79.0564, GNorm = 0.7634, lr_0 = 3.6018e-04
Loss = 1.2365e-01, PNorm = 79.0624, GNorm = 0.7406, lr_0 = 3.5993e-04
Loss = 1.1665e-01, PNorm = 79.0711, GNorm = 0.9657, lr_0 = 3.5969e-04
Loss = 1.3062e-01, PNorm = 79.0819, GNorm = 0.6549, lr_0 = 3.5944e-04
Loss = 1.1747e-01, PNorm = 79.0917, GNorm = 0.5472, lr_0 = 3.5919e-04
Loss = 1.1517e-01, PNorm = 79.0999, GNorm = 0.6074, lr_0 = 3.5895e-04
Loss = 1.0330e-01, PNorm = 79.1089, GNorm = 0.6668, lr_0 = 3.5870e-04
Loss = 1.1704e-01, PNorm = 79.1183, GNorm = 0.8770, lr_0 = 3.5845e-04
Loss = 1.1109e-01, PNorm = 79.1239, GNorm = 0.7613, lr_0 = 3.5821e-04
Loss = 1.2161e-01, PNorm = 79.1308, GNorm = 0.5498, lr_0 = 3.5796e-04
Loss = 1.0338e-01, PNorm = 79.1371, GNorm = 0.6551, lr_0 = 3.5772e-04
Loss = 1.0190e-01, PNorm = 79.1428, GNorm = 0.5421, lr_0 = 3.5747e-04
Loss = 1.0848e-01, PNorm = 79.1466, GNorm = 0.6356, lr_0 = 3.5723e-04
Loss = 1.2283e-01, PNorm = 79.1517, GNorm = 0.8985, lr_0 = 3.5698e-04
Loss = 1.2133e-01, PNorm = 79.1629, GNorm = 0.7163, lr_0 = 3.5674e-04
Loss = 1.1885e-01, PNorm = 79.1775, GNorm = 0.5960, lr_0 = 3.5650e-04
Loss = 1.1963e-01, PNorm = 79.1902, GNorm = 0.6158, lr_0 = 3.5625e-04
Loss = 1.0766e-01, PNorm = 79.1965, GNorm = 0.8904, lr_0 = 3.5601e-04
Loss = 1.1711e-01, PNorm = 79.2035, GNorm = 0.7224, lr_0 = 3.5576e-04
Loss = 1.1101e-01, PNorm = 79.2138, GNorm = 0.5840, lr_0 = 3.5552e-04
Loss = 1.0693e-01, PNorm = 79.2245, GNorm = 0.9785, lr_0 = 3.5528e-04
Loss = 1.0546e-01, PNorm = 79.2334, GNorm = 0.9087, lr_0 = 3.5503e-04
Loss = 1.2122e-01, PNorm = 79.2400, GNorm = 0.6354, lr_0 = 3.5479e-04
Loss = 1.0998e-01, PNorm = 79.2490, GNorm = 0.5735, lr_0 = 3.5455e-04
Loss = 1.0335e-01, PNorm = 79.2521, GNorm = 0.5216, lr_0 = 3.5430e-04
Loss = 9.9095e-02, PNorm = 79.2595, GNorm = 0.6637, lr_0 = 3.5406e-04
Loss = 1.0810e-01, PNorm = 79.2659, GNorm = 0.6001, lr_0 = 3.5382e-04
Loss = 1.1211e-01, PNorm = 79.2724, GNorm = 0.7213, lr_0 = 3.5358e-04
Loss = 1.1196e-01, PNorm = 79.2767, GNorm = 0.7596, lr_0 = 3.5333e-04
Loss = 1.1071e-01, PNorm = 79.2833, GNorm = 0.4242, lr_0 = 3.5309e-04
Loss = 1.1696e-01, PNorm = 79.2877, GNorm = 0.7644, lr_0 = 3.5285e-04
Loss = 1.2111e-01, PNorm = 79.2947, GNorm = 1.0207, lr_0 = 3.5261e-04
Loss = 1.1239e-01, PNorm = 79.3037, GNorm = 0.6619, lr_0 = 3.5237e-04
Loss = 1.1920e-01, PNorm = 79.3110, GNorm = 0.5376, lr_0 = 3.5212e-04
Loss = 1.0892e-01, PNorm = 79.3180, GNorm = 0.8980, lr_0 = 3.5188e-04
Loss = 1.0938e-01, PNorm = 79.3243, GNorm = 1.1184, lr_0 = 3.5164e-04
Loss = 1.2331e-01, PNorm = 79.3345, GNorm = 0.7593, lr_0 = 3.5140e-04
Loss = 1.0730e-01, PNorm = 79.3453, GNorm = 0.7143, lr_0 = 3.5116e-04
Loss = 1.1023e-01, PNorm = 79.3514, GNorm = 0.5168, lr_0 = 3.5092e-04
Loss = 1.0774e-01, PNorm = 79.3615, GNorm = 0.5957, lr_0 = 3.5068e-04
Loss = 1.3549e-01, PNorm = 79.3704, GNorm = 0.7655, lr_0 = 3.5044e-04
Loss = 1.0067e-01, PNorm = 79.3800, GNorm = 0.6826, lr_0 = 3.5020e-04
Loss = 1.1547e-01, PNorm = 79.3887, GNorm = 0.7850, lr_0 = 3.4996e-04
Loss = 1.1174e-01, PNorm = 79.3937, GNorm = 0.4669, lr_0 = 3.4972e-04
Loss = 1.1505e-01, PNorm = 79.3974, GNorm = 0.7369, lr_0 = 3.4948e-04
Loss = 1.1548e-01, PNorm = 79.4088, GNorm = 0.5534, lr_0 = 3.4924e-04
Loss = 1.2413e-01, PNorm = 79.4123, GNorm = 0.7467, lr_0 = 3.4900e-04
Loss = 1.2328e-01, PNorm = 79.4214, GNorm = 0.6982, lr_0 = 3.4876e-04
Loss = 1.1407e-01, PNorm = 79.4278, GNorm = 0.7996, lr_0 = 3.4852e-04
Loss = 1.2419e-01, PNorm = 79.4366, GNorm = 0.5340, lr_0 = 3.4828e-04
Loss = 1.0750e-01, PNorm = 79.4457, GNorm = 1.0740, lr_0 = 3.4805e-04
Loss = 1.2391e-01, PNorm = 79.4558, GNorm = 0.5882, lr_0 = 3.4781e-04
Loss = 1.0855e-01, PNorm = 79.4670, GNorm = 0.6630, lr_0 = 3.4757e-04
Loss = 1.0809e-01, PNorm = 79.4764, GNorm = 0.7743, lr_0 = 3.4733e-04
Loss = 1.0720e-01, PNorm = 79.4808, GNorm = 0.6746, lr_0 = 3.4709e-04
Loss = 1.0668e-01, PNorm = 79.4866, GNorm = 0.6749, lr_0 = 3.4686e-04
Loss = 1.0429e-01, PNorm = 79.4935, GNorm = 0.7993, lr_0 = 3.4662e-04
Loss = 1.2996e-01, PNorm = 79.5005, GNorm = 0.6821, lr_0 = 3.4638e-04
Loss = 1.1840e-01, PNorm = 79.5061, GNorm = 0.6236, lr_0 = 3.4614e-04
Loss = 1.1204e-01, PNorm = 79.5135, GNorm = 0.9980, lr_0 = 3.4591e-04
Loss = 1.2101e-01, PNorm = 79.5244, GNorm = 0.5521, lr_0 = 3.4567e-04
Loss = 1.0800e-01, PNorm = 79.5297, GNorm = 0.7997, lr_0 = 3.4543e-04
Loss = 1.1616e-01, PNorm = 79.5363, GNorm = 0.6965, lr_0 = 3.4520e-04
Loss = 1.2278e-01, PNorm = 79.5435, GNorm = 0.8863, lr_0 = 3.4496e-04
Loss = 1.1598e-01, PNorm = 79.5488, GNorm = 0.7593, lr_0 = 3.4472e-04
Loss = 1.2949e-01, PNorm = 79.5530, GNorm = 0.7476, lr_0 = 3.4449e-04
Loss = 1.1721e-01, PNorm = 79.5604, GNorm = 1.1386, lr_0 = 3.4425e-04
Loss = 1.0763e-01, PNorm = 79.5676, GNorm = 0.6658, lr_0 = 3.4402e-04
Loss = 1.0354e-01, PNorm = 79.5749, GNorm = 0.5935, lr_0 = 3.4378e-04
Loss = 1.1427e-01, PNorm = 79.5842, GNorm = 0.6759, lr_0 = 3.4354e-04
Loss = 1.1386e-01, PNorm = 79.5912, GNorm = 0.5557, lr_0 = 3.4331e-04
Validation mae = 0.231246
Epoch 15
Loss = 1.1929e-01, PNorm = 79.6023, GNorm = 0.6420, lr_0 = 3.4307e-04
Loss = 1.0726e-01, PNorm = 79.6158, GNorm = 0.8176, lr_0 = 3.4284e-04
Loss = 1.0603e-01, PNorm = 79.6270, GNorm = 0.4803, lr_0 = 3.4260e-04
Loss = 1.1338e-01, PNorm = 79.6342, GNorm = 0.5415, lr_0 = 3.4237e-04
Loss = 9.9414e-02, PNorm = 79.6436, GNorm = 0.5330, lr_0 = 3.4213e-04
Loss = 8.3489e-02, PNorm = 79.6503, GNorm = 0.5058, lr_0 = 3.4190e-04
Loss = 9.2814e-02, PNorm = 79.6589, GNorm = 0.6419, lr_0 = 3.4167e-04
Loss = 9.5701e-02, PNorm = 79.6672, GNorm = 0.7807, lr_0 = 3.4143e-04
Loss = 1.1579e-01, PNorm = 79.6736, GNorm = 0.5723, lr_0 = 3.4120e-04
Loss = 9.8239e-02, PNorm = 79.6783, GNorm = 0.7679, lr_0 = 3.4096e-04
Loss = 1.1716e-01, PNorm = 79.6863, GNorm = 0.5506, lr_0 = 3.4073e-04
Loss = 1.2384e-01, PNorm = 79.6944, GNorm = 0.6468, lr_0 = 3.4050e-04
Loss = 8.8274e-02, PNorm = 79.7006, GNorm = 0.6087, lr_0 = 3.4026e-04
Loss = 1.1033e-01, PNorm = 79.7128, GNorm = 0.5224, lr_0 = 3.4003e-04
Loss = 1.1597e-01, PNorm = 79.7222, GNorm = 0.7179, lr_0 = 3.3980e-04
Loss = 1.1737e-01, PNorm = 79.7304, GNorm = 0.6133, lr_0 = 3.3956e-04
Loss = 9.7755e-02, PNorm = 79.7381, GNorm = 0.5047, lr_0 = 3.3933e-04
Loss = 1.0836e-01, PNorm = 79.7479, GNorm = 0.5497, lr_0 = 3.3910e-04
Loss = 1.0987e-01, PNorm = 79.7555, GNorm = 0.5061, lr_0 = 3.3887e-04
Loss = 1.0417e-01, PNorm = 79.7634, GNorm = 0.9053, lr_0 = 3.3864e-04
Loss = 9.6443e-02, PNorm = 79.7723, GNorm = 0.8656, lr_0 = 3.3840e-04
Loss = 1.2935e-01, PNorm = 79.7810, GNorm = 0.7294, lr_0 = 3.3817e-04
Loss = 1.1037e-01, PNorm = 79.7929, GNorm = 0.9045, lr_0 = 3.3794e-04
Loss = 1.0287e-01, PNorm = 79.8028, GNorm = 0.6038, lr_0 = 3.3771e-04
Loss = 1.1731e-01, PNorm = 79.8153, GNorm = 0.6910, lr_0 = 3.3748e-04
Loss = 9.7578e-02, PNorm = 79.8279, GNorm = 0.9795, lr_0 = 3.3725e-04
Loss = 9.8801e-02, PNorm = 79.8297, GNorm = 0.4431, lr_0 = 3.3701e-04
Loss = 9.7353e-02, PNorm = 79.8351, GNorm = 0.5896, lr_0 = 3.3678e-04
Loss = 1.0956e-01, PNorm = 79.8404, GNorm = 0.9494, lr_0 = 3.3655e-04
Loss = 1.0371e-01, PNorm = 79.8510, GNorm = 0.4959, lr_0 = 3.3632e-04
Loss = 1.0764e-01, PNorm = 79.8610, GNorm = 0.4767, lr_0 = 3.3609e-04
Loss = 1.0190e-01, PNorm = 79.8685, GNorm = 0.5989, lr_0 = 3.3586e-04
Loss = 1.1192e-01, PNorm = 79.8775, GNorm = 1.0778, lr_0 = 3.3563e-04
Loss = 1.0954e-01, PNorm = 79.8852, GNorm = 1.2155, lr_0 = 3.3540e-04
Loss = 1.2392e-01, PNorm = 79.8905, GNorm = 0.8122, lr_0 = 3.3517e-04
Loss = 1.0735e-01, PNorm = 79.8977, GNorm = 0.7030, lr_0 = 3.3494e-04
Loss = 1.2533e-01, PNorm = 79.9069, GNorm = 0.7121, lr_0 = 3.3471e-04
Loss = 1.0329e-01, PNorm = 79.9163, GNorm = 0.6095, lr_0 = 3.3448e-04
Loss = 1.1339e-01, PNorm = 79.9257, GNorm = 0.6866, lr_0 = 3.3425e-04
Loss = 1.1485e-01, PNorm = 79.9353, GNorm = 0.6921, lr_0 = 3.3403e-04
Loss = 1.2461e-01, PNorm = 79.9434, GNorm = 0.8766, lr_0 = 3.3380e-04
Loss = 1.0419e-01, PNorm = 79.9500, GNorm = 0.9678, lr_0 = 3.3357e-04
Loss = 1.0236e-01, PNorm = 79.9582, GNorm = 0.7014, lr_0 = 3.3334e-04
Loss = 9.6994e-02, PNorm = 79.9654, GNorm = 0.5038, lr_0 = 3.3311e-04
Loss = 1.0408e-01, PNorm = 79.9703, GNorm = 0.6870, lr_0 = 3.3288e-04
Loss = 9.7024e-02, PNorm = 79.9774, GNorm = 0.7700, lr_0 = 3.3265e-04
Loss = 1.1014e-01, PNorm = 79.9838, GNorm = 0.9033, lr_0 = 3.3243e-04
Loss = 1.0957e-01, PNorm = 79.9874, GNorm = 0.5299, lr_0 = 3.3220e-04
Loss = 1.0461e-01, PNorm = 79.9935, GNorm = 0.9118, lr_0 = 3.3197e-04
Loss = 1.1329e-01, PNorm = 79.9985, GNorm = 0.6331, lr_0 = 3.3174e-04
Loss = 1.1697e-01, PNorm = 80.0061, GNorm = 0.7533, lr_0 = 3.3152e-04
Loss = 1.1756e-01, PNorm = 80.0167, GNorm = 0.9151, lr_0 = 3.3129e-04
Loss = 1.1722e-01, PNorm = 80.0248, GNorm = 0.6482, lr_0 = 3.3106e-04
Loss = 1.0513e-01, PNorm = 80.0298, GNorm = 0.5815, lr_0 = 3.3084e-04
Loss = 1.1823e-01, PNorm = 80.0394, GNorm = 0.6750, lr_0 = 3.3061e-04
Loss = 1.0153e-01, PNorm = 80.0467, GNorm = 0.5320, lr_0 = 3.3038e-04
Loss = 1.1643e-01, PNorm = 80.0532, GNorm = 0.7287, lr_0 = 3.3016e-04
Loss = 1.0633e-01, PNorm = 80.0608, GNorm = 0.6923, lr_0 = 3.2993e-04
Loss = 9.2650e-02, PNorm = 80.0658, GNorm = 0.5061, lr_0 = 3.2970e-04
Loss = 1.1228e-01, PNorm = 80.0711, GNorm = 0.6409, lr_0 = 3.2948e-04
Loss = 1.0582e-01, PNorm = 80.0779, GNorm = 0.6618, lr_0 = 3.2925e-04
Loss = 1.2118e-01, PNorm = 80.0864, GNorm = 0.6750, lr_0 = 3.2903e-04
Loss = 1.1674e-01, PNorm = 80.0950, GNorm = 0.5991, lr_0 = 3.2880e-04
Loss = 1.1186e-01, PNorm = 80.1060, GNorm = 0.7883, lr_0 = 3.2858e-04
Loss = 9.8092e-02, PNorm = 80.1156, GNorm = 0.6458, lr_0 = 3.2835e-04
Loss = 1.1949e-01, PNorm = 80.1178, GNorm = 0.7031, lr_0 = 3.2813e-04
Loss = 1.0336e-01, PNorm = 80.1238, GNorm = 0.6232, lr_0 = 3.2790e-04
Loss = 1.1210e-01, PNorm = 80.1343, GNorm = 0.9910, lr_0 = 3.2768e-04
Loss = 1.0176e-01, PNorm = 80.1418, GNorm = 0.6195, lr_0 = 3.2745e-04
Loss = 9.6769e-02, PNorm = 80.1494, GNorm = 0.4748, lr_0 = 3.2723e-04
Loss = 1.1127e-01, PNorm = 80.1509, GNorm = 0.5775, lr_0 = 3.2700e-04
Loss = 1.2845e-01, PNorm = 80.1594, GNorm = 0.9838, lr_0 = 3.2678e-04
Loss = 1.1213e-01, PNorm = 80.1694, GNorm = 0.6114, lr_0 = 3.2656e-04
Loss = 1.2477e-01, PNorm = 80.1765, GNorm = 0.5940, lr_0 = 3.2633e-04
Loss = 1.0905e-01, PNorm = 80.1824, GNorm = 1.4868, lr_0 = 3.2611e-04
Loss = 1.0728e-01, PNorm = 80.1923, GNorm = 0.5186, lr_0 = 3.2589e-04
Loss = 1.0281e-01, PNorm = 80.2019, GNorm = 0.7254, lr_0 = 3.2566e-04
Loss = 1.1413e-01, PNorm = 80.2092, GNorm = 0.9224, lr_0 = 3.2544e-04
Loss = 1.1223e-01, PNorm = 80.2158, GNorm = 0.6671, lr_0 = 3.2522e-04
Loss = 1.0128e-01, PNorm = 80.2219, GNorm = 0.5635, lr_0 = 3.2499e-04
Loss = 1.0177e-01, PNorm = 80.2307, GNorm = 0.9180, lr_0 = 3.2477e-04
Loss = 1.1030e-01, PNorm = 80.2362, GNorm = 0.6101, lr_0 = 3.2455e-04
Loss = 1.0466e-01, PNorm = 80.2429, GNorm = 0.5762, lr_0 = 3.2433e-04
Loss = 1.1055e-01, PNorm = 80.2527, GNorm = 0.8066, lr_0 = 3.2410e-04
Loss = 1.0319e-01, PNorm = 80.2606, GNorm = 0.5050, lr_0 = 3.2388e-04
Loss = 1.0790e-01, PNorm = 80.2696, GNorm = 0.5864, lr_0 = 3.2366e-04
Loss = 1.0317e-01, PNorm = 80.2802, GNorm = 0.7509, lr_0 = 3.2344e-04
Loss = 1.0865e-01, PNorm = 80.2884, GNorm = 0.6975, lr_0 = 3.2322e-04
Loss = 1.0814e-01, PNorm = 80.2956, GNorm = 0.6899, lr_0 = 3.2300e-04
Loss = 1.0296e-01, PNorm = 80.3035, GNorm = 0.8388, lr_0 = 3.2277e-04
Loss = 1.1660e-01, PNorm = 80.3091, GNorm = 0.5667, lr_0 = 3.2255e-04
Loss = 1.0711e-01, PNorm = 80.3162, GNorm = 0.5986, lr_0 = 3.2233e-04
Loss = 1.0652e-01, PNorm = 80.3230, GNorm = 0.7340, lr_0 = 3.2211e-04
Loss = 1.0569e-01, PNorm = 80.3311, GNorm = 0.9770, lr_0 = 3.2189e-04
Loss = 1.1434e-01, PNorm = 80.3383, GNorm = 0.7753, lr_0 = 3.2167e-04
Loss = 1.1551e-01, PNorm = 80.3414, GNorm = 0.5887, lr_0 = 3.2145e-04
Loss = 9.9239e-02, PNorm = 80.3474, GNorm = 0.5898, lr_0 = 3.2123e-04
Loss = 1.1877e-01, PNorm = 80.3545, GNorm = 1.1680, lr_0 = 3.2101e-04
Loss = 1.1198e-01, PNorm = 80.3611, GNorm = 0.5595, lr_0 = 3.2079e-04
Loss = 9.9362e-02, PNorm = 80.3706, GNorm = 0.4711, lr_0 = 3.2057e-04
Loss = 1.0330e-01, PNorm = 80.3784, GNorm = 0.8129, lr_0 = 3.2035e-04
Loss = 1.0988e-01, PNorm = 80.3876, GNorm = 0.7552, lr_0 = 3.2013e-04
Loss = 1.1693e-01, PNorm = 80.3944, GNorm = 0.7821, lr_0 = 3.1991e-04
Loss = 1.1195e-01, PNorm = 80.3970, GNorm = 0.5833, lr_0 = 3.1969e-04
Loss = 1.0370e-01, PNorm = 80.4007, GNorm = 0.7424, lr_0 = 3.1947e-04
Loss = 1.0237e-01, PNorm = 80.4086, GNorm = 0.6620, lr_0 = 3.1925e-04
Loss = 1.0620e-01, PNorm = 80.4173, GNorm = 1.0270, lr_0 = 3.1904e-04
Loss = 1.1066e-01, PNorm = 80.4252, GNorm = 0.4904, lr_0 = 3.1882e-04
Loss = 1.0620e-01, PNorm = 80.4307, GNorm = 0.8456, lr_0 = 3.1860e-04
Loss = 1.2548e-01, PNorm = 80.4350, GNorm = 0.8442, lr_0 = 3.1838e-04
Loss = 1.1841e-01, PNorm = 80.4397, GNorm = 0.5435, lr_0 = 3.1816e-04
Loss = 1.3372e-01, PNorm = 80.4468, GNorm = 0.9121, lr_0 = 3.1794e-04
Loss = 8.9073e-02, PNorm = 80.4584, GNorm = 0.4866, lr_0 = 3.1773e-04
Loss = 9.8013e-02, PNorm = 80.4712, GNorm = 0.9044, lr_0 = 3.1751e-04
Loss = 1.0588e-01, PNorm = 80.4778, GNorm = 0.6844, lr_0 = 3.1729e-04
Loss = 9.5461e-02, PNorm = 80.4785, GNorm = 0.6545, lr_0 = 3.1707e-04
Loss = 9.5171e-02, PNorm = 80.4826, GNorm = 1.2657, lr_0 = 3.1686e-04
Loss = 1.0593e-01, PNorm = 80.4857, GNorm = 0.6498, lr_0 = 3.1664e-04
Loss = 1.1190e-01, PNorm = 80.4907, GNorm = 0.4655, lr_0 = 3.1642e-04
Loss = 1.0158e-01, PNorm = 80.4985, GNorm = 1.3917, lr_0 = 3.1621e-04
Validation mae = 0.229151
Epoch 16
Loss = 1.0754e-01, PNorm = 80.5065, GNorm = 0.5298, lr_0 = 3.1599e-04
Loss = 1.0385e-01, PNorm = 80.5168, GNorm = 0.6531, lr_0 = 3.1577e-04
Loss = 1.0316e-01, PNorm = 80.5246, GNorm = 0.9169, lr_0 = 3.1556e-04
Loss = 1.0070e-01, PNorm = 80.5284, GNorm = 0.6308, lr_0 = 3.1534e-04
Loss = 9.8702e-02, PNorm = 80.5362, GNorm = 0.9996, lr_0 = 3.1512e-04
Loss = 1.0700e-01, PNorm = 80.5404, GNorm = 0.6718, lr_0 = 3.1491e-04
Loss = 8.9421e-02, PNorm = 80.5472, GNorm = 0.6055, lr_0 = 3.1469e-04
Loss = 1.0239e-01, PNorm = 80.5563, GNorm = 0.5823, lr_0 = 3.1448e-04
Loss = 1.0399e-01, PNorm = 80.5643, GNorm = 0.9262, lr_0 = 3.1426e-04
Loss = 9.0623e-02, PNorm = 80.5718, GNorm = 0.7101, lr_0 = 3.1405e-04
Loss = 9.4283e-02, PNorm = 80.5804, GNorm = 0.6914, lr_0 = 3.1383e-04
Loss = 9.3363e-02, PNorm = 80.5892, GNorm = 0.6566, lr_0 = 3.1362e-04
Loss = 1.0767e-01, PNorm = 80.5940, GNorm = 0.4539, lr_0 = 3.1340e-04
Loss = 1.0783e-01, PNorm = 80.6037, GNorm = 0.6468, lr_0 = 3.1319e-04
Loss = 1.1279e-01, PNorm = 80.6084, GNorm = 0.6418, lr_0 = 3.1297e-04
Loss = 1.0090e-01, PNorm = 80.6143, GNorm = 0.6692, lr_0 = 3.1276e-04
Loss = 1.0102e-01, PNorm = 80.6221, GNorm = 0.8376, lr_0 = 3.1254e-04
Loss = 1.1421e-01, PNorm = 80.6276, GNorm = 1.0011, lr_0 = 3.1233e-04
Loss = 9.3026e-02, PNorm = 80.6318, GNorm = 0.5473, lr_0 = 3.1212e-04
Loss = 9.3702e-02, PNorm = 80.6397, GNorm = 0.5597, lr_0 = 3.1190e-04
Loss = 9.9904e-02, PNorm = 80.6478, GNorm = 0.5733, lr_0 = 3.1169e-04
Loss = 9.6226e-02, PNorm = 80.6544, GNorm = 0.7198, lr_0 = 3.1147e-04
Loss = 9.5743e-02, PNorm = 80.6589, GNorm = 0.5360, lr_0 = 3.1126e-04
Loss = 1.0624e-01, PNorm = 80.6680, GNorm = 1.0885, lr_0 = 3.1105e-04
Loss = 9.5692e-02, PNorm = 80.6767, GNorm = 0.5674, lr_0 = 3.1083e-04
Loss = 1.0173e-01, PNorm = 80.6826, GNorm = 0.4900, lr_0 = 3.1062e-04
Loss = 9.8635e-02, PNorm = 80.6917, GNorm = 0.6744, lr_0 = 3.1041e-04
Loss = 1.0040e-01, PNorm = 80.6973, GNorm = 0.5699, lr_0 = 3.1020e-04
Loss = 1.0545e-01, PNorm = 80.7026, GNorm = 0.6052, lr_0 = 3.0998e-04
Loss = 1.0753e-01, PNorm = 80.7059, GNorm = 0.6082, lr_0 = 3.0977e-04
Loss = 9.2656e-02, PNorm = 80.7079, GNorm = 0.7738, lr_0 = 3.0956e-04
Loss = 9.9120e-02, PNorm = 80.7147, GNorm = 0.6745, lr_0 = 3.0935e-04
Loss = 1.0901e-01, PNorm = 80.7206, GNorm = 0.5751, lr_0 = 3.0914e-04
Loss = 1.0634e-01, PNorm = 80.7281, GNorm = 0.8398, lr_0 = 3.0892e-04
Loss = 1.0448e-01, PNorm = 80.7332, GNorm = 0.7805, lr_0 = 3.0871e-04
Loss = 1.0903e-01, PNorm = 80.7425, GNorm = 0.4207, lr_0 = 3.0850e-04
Loss = 1.0848e-01, PNorm = 80.7494, GNorm = 0.5988, lr_0 = 3.0829e-04
Loss = 9.1521e-02, PNorm = 80.7579, GNorm = 0.4958, lr_0 = 3.0808e-04
Loss = 9.6735e-02, PNorm = 80.7661, GNorm = 0.5380, lr_0 = 3.0787e-04
Loss = 1.1211e-01, PNorm = 80.7726, GNorm = 0.9284, lr_0 = 3.0766e-04
Loss = 1.0839e-01, PNorm = 80.7788, GNorm = 0.7144, lr_0 = 3.0745e-04
Loss = 9.8738e-02, PNorm = 80.7869, GNorm = 0.9016, lr_0 = 3.0723e-04
Loss = 9.9538e-02, PNorm = 80.7905, GNorm = 0.6151, lr_0 = 3.0702e-04
Loss = 9.7817e-02, PNorm = 80.7977, GNorm = 0.5829, lr_0 = 3.0681e-04
Loss = 1.1258e-01, PNorm = 80.8015, GNorm = 0.4989, lr_0 = 3.0660e-04
Loss = 1.0737e-01, PNorm = 80.8064, GNorm = 0.7169, lr_0 = 3.0639e-04
Loss = 1.0124e-01, PNorm = 80.8130, GNorm = 0.5634, lr_0 = 3.0618e-04
Loss = 1.0005e-01, PNorm = 80.8250, GNorm = 0.8752, lr_0 = 3.0597e-04
Loss = 9.3265e-02, PNorm = 80.8329, GNorm = 0.5782, lr_0 = 3.0576e-04
Loss = 1.0830e-01, PNorm = 80.8401, GNorm = 0.7328, lr_0 = 3.0555e-04
Loss = 1.0711e-01, PNorm = 80.8481, GNorm = 0.6717, lr_0 = 3.0535e-04
Loss = 1.0426e-01, PNorm = 80.8507, GNorm = 0.6043, lr_0 = 3.0514e-04
Loss = 1.0092e-01, PNorm = 80.8573, GNorm = 0.9652, lr_0 = 3.0493e-04
Loss = 1.1474e-01, PNorm = 80.8660, GNorm = 0.6862, lr_0 = 3.0472e-04
Loss = 9.5713e-02, PNorm = 80.8754, GNorm = 0.5486, lr_0 = 3.0451e-04
Loss = 1.1014e-01, PNorm = 80.8850, GNorm = 0.6474, lr_0 = 3.0430e-04
Loss = 9.8816e-02, PNorm = 80.8928, GNorm = 0.6846, lr_0 = 3.0409e-04
Loss = 1.0025e-01, PNorm = 80.8983, GNorm = 0.8389, lr_0 = 3.0388e-04
Loss = 1.0816e-01, PNorm = 80.9009, GNorm = 0.6870, lr_0 = 3.0368e-04
Loss = 1.0556e-01, PNorm = 80.9062, GNorm = 0.9476, lr_0 = 3.0347e-04
Loss = 1.1154e-01, PNorm = 80.9079, GNorm = 0.5240, lr_0 = 3.0326e-04
Loss = 1.0519e-01, PNorm = 80.9142, GNorm = 0.7509, lr_0 = 3.0305e-04
Loss = 1.0209e-01, PNorm = 80.9208, GNorm = 0.8879, lr_0 = 3.0284e-04
Loss = 1.1802e-01, PNorm = 80.9307, GNorm = 0.6204, lr_0 = 3.0264e-04
Loss = 1.1974e-01, PNorm = 80.9398, GNorm = 0.5456, lr_0 = 3.0243e-04
Loss = 1.0500e-01, PNorm = 80.9492, GNorm = 0.5785, lr_0 = 3.0222e-04
Loss = 8.6948e-02, PNorm = 80.9556, GNorm = 0.6175, lr_0 = 3.0202e-04
Loss = 1.0437e-01, PNorm = 80.9618, GNorm = 0.6869, lr_0 = 3.0181e-04
Loss = 9.5050e-02, PNorm = 80.9683, GNorm = 0.5007, lr_0 = 3.0160e-04
Loss = 9.5119e-02, PNorm = 80.9736, GNorm = 0.6044, lr_0 = 3.0140e-04
Loss = 1.1676e-01, PNorm = 80.9790, GNorm = 0.5506, lr_0 = 3.0119e-04
Loss = 1.1109e-01, PNorm = 80.9842, GNorm = 0.7872, lr_0 = 3.0098e-04
Loss = 1.0010e-01, PNorm = 80.9894, GNorm = 1.0810, lr_0 = 3.0078e-04
Loss = 1.0907e-01, PNorm = 80.9945, GNorm = 1.0706, lr_0 = 3.0057e-04
Loss = 1.2121e-01, PNorm = 81.0006, GNorm = 0.5899, lr_0 = 3.0036e-04
Loss = 1.0938e-01, PNorm = 81.0077, GNorm = 1.0395, lr_0 = 3.0016e-04
Loss = 1.1462e-01, PNorm = 81.0160, GNorm = 1.1076, lr_0 = 2.9995e-04
Loss = 1.0893e-01, PNorm = 81.0195, GNorm = 0.5830, lr_0 = 2.9975e-04
Loss = 1.1623e-01, PNorm = 81.0237, GNorm = 0.6240, lr_0 = 2.9954e-04
Loss = 9.6532e-02, PNorm = 81.0267, GNorm = 0.7804, lr_0 = 2.9934e-04
Loss = 9.0392e-02, PNorm = 81.0299, GNorm = 0.6837, lr_0 = 2.9913e-04
Loss = 1.2032e-01, PNorm = 81.0377, GNorm = 0.7915, lr_0 = 2.9893e-04
Loss = 1.0787e-01, PNorm = 81.0457, GNorm = 0.6898, lr_0 = 2.9872e-04
Loss = 9.9363e-02, PNorm = 81.0531, GNorm = 0.6562, lr_0 = 2.9852e-04
Loss = 1.0409e-01, PNorm = 81.0642, GNorm = 0.4566, lr_0 = 2.9831e-04
Loss = 1.2015e-01, PNorm = 81.0715, GNorm = 0.7372, lr_0 = 2.9811e-04
Loss = 1.1086e-01, PNorm = 81.0774, GNorm = 1.2742, lr_0 = 2.9790e-04
Loss = 1.0209e-01, PNorm = 81.0827, GNorm = 0.9240, lr_0 = 2.9770e-04
Loss = 9.7758e-02, PNorm = 81.0908, GNorm = 0.7500, lr_0 = 2.9750e-04
Loss = 8.8732e-02, PNorm = 81.0975, GNorm = 0.7172, lr_0 = 2.9729e-04
Loss = 1.0939e-01, PNorm = 81.1000, GNorm = 0.9409, lr_0 = 2.9709e-04
Loss = 9.9039e-02, PNorm = 81.1087, GNorm = 0.7018, lr_0 = 2.9689e-04
Loss = 1.1415e-01, PNorm = 81.1136, GNorm = 0.6496, lr_0 = 2.9668e-04
Loss = 9.9528e-02, PNorm = 81.1216, GNorm = 0.6475, lr_0 = 2.9648e-04
Loss = 9.8992e-02, PNorm = 81.1299, GNorm = 0.9436, lr_0 = 2.9628e-04
Loss = 1.0015e-01, PNorm = 81.1347, GNorm = 0.6689, lr_0 = 2.9607e-04
Loss = 1.1031e-01, PNorm = 81.1422, GNorm = 0.6174, lr_0 = 2.9587e-04
Loss = 1.1014e-01, PNorm = 81.1462, GNorm = 0.6155, lr_0 = 2.9567e-04
Loss = 1.1715e-01, PNorm = 81.1528, GNorm = 0.7495, lr_0 = 2.9546e-04
Loss = 1.0087e-01, PNorm = 81.1601, GNorm = 0.8084, lr_0 = 2.9526e-04
Loss = 1.0970e-01, PNorm = 81.1633, GNorm = 0.6978, lr_0 = 2.9506e-04
Loss = 1.0366e-01, PNorm = 81.1719, GNorm = 0.6473, lr_0 = 2.9486e-04
Loss = 1.2146e-01, PNorm = 81.1767, GNorm = 0.8911, lr_0 = 2.9466e-04
Loss = 1.0070e-01, PNorm = 81.1838, GNorm = 0.5485, lr_0 = 2.9445e-04
Loss = 8.9037e-02, PNorm = 81.1932, GNorm = 0.5162, lr_0 = 2.9425e-04
Loss = 9.7431e-02, PNorm = 81.1950, GNorm = 0.5554, lr_0 = 2.9405e-04
Loss = 1.0643e-01, PNorm = 81.1958, GNorm = 0.5740, lr_0 = 2.9385e-04
Loss = 1.0340e-01, PNorm = 81.2037, GNorm = 0.6454, lr_0 = 2.9365e-04
Loss = 1.1498e-01, PNorm = 81.2124, GNorm = 0.8507, lr_0 = 2.9345e-04
Loss = 1.0420e-01, PNorm = 81.2203, GNorm = 1.0197, lr_0 = 2.9325e-04
Loss = 1.0909e-01, PNorm = 81.2295, GNorm = 0.6085, lr_0 = 2.9305e-04
Loss = 1.1338e-01, PNorm = 81.2381, GNorm = 0.6649, lr_0 = 2.9284e-04
Loss = 1.0919e-01, PNorm = 81.2460, GNorm = 0.6086, lr_0 = 2.9264e-04
Loss = 1.1395e-01, PNorm = 81.2498, GNorm = 1.3485, lr_0 = 2.9244e-04
Loss = 1.1641e-01, PNorm = 81.2524, GNorm = 0.8998, lr_0 = 2.9224e-04
Loss = 9.9949e-02, PNorm = 81.2614, GNorm = 0.5697, lr_0 = 2.9204e-04
Loss = 1.0131e-01, PNorm = 81.2669, GNorm = 0.7184, lr_0 = 2.9184e-04
Loss = 1.0408e-01, PNorm = 81.2731, GNorm = 0.7051, lr_0 = 2.9164e-04
Loss = 1.0791e-01, PNorm = 81.2779, GNorm = 0.6336, lr_0 = 2.9144e-04
Loss = 1.0618e-01, PNorm = 81.2824, GNorm = 0.6894, lr_0 = 2.9124e-04
Validation mae = 0.229000
Epoch 17
Loss = 1.0234e-01, PNorm = 81.2883, GNorm = 0.7251, lr_0 = 2.9104e-04
Loss = 1.0528e-01, PNorm = 81.2977, GNorm = 0.5193, lr_0 = 2.9084e-04
Loss = 1.0280e-01, PNorm = 81.3044, GNorm = 0.6284, lr_0 = 2.9065e-04
Loss = 9.5273e-02, PNorm = 81.3115, GNorm = 0.5025, lr_0 = 2.9045e-04
Loss = 9.6804e-02, PNorm = 81.3178, GNorm = 0.8887, lr_0 = 2.9025e-04
Loss = 8.5212e-02, PNorm = 81.3242, GNorm = 0.7330, lr_0 = 2.9005e-04
Loss = 1.0574e-01, PNorm = 81.3301, GNorm = 1.0708, lr_0 = 2.8985e-04
Loss = 1.0106e-01, PNorm = 81.3351, GNorm = 0.5391, lr_0 = 2.8965e-04
Loss = 1.0068e-01, PNorm = 81.3437, GNorm = 0.7111, lr_0 = 2.8945e-04
Loss = 9.2877e-02, PNorm = 81.3502, GNorm = 0.7819, lr_0 = 2.8925e-04
Loss = 9.5924e-02, PNorm = 81.3556, GNorm = 0.7770, lr_0 = 2.8906e-04
Loss = 9.6893e-02, PNorm = 81.3612, GNorm = 0.5492, lr_0 = 2.8886e-04
Loss = 1.0538e-01, PNorm = 81.3674, GNorm = 0.7237, lr_0 = 2.8866e-04
Loss = 1.0052e-01, PNorm = 81.3711, GNorm = 0.5668, lr_0 = 2.8846e-04
Loss = 9.5642e-02, PNorm = 81.3784, GNorm = 0.5951, lr_0 = 2.8826e-04
Loss = 8.2864e-02, PNorm = 81.3814, GNorm = 0.6076, lr_0 = 2.8807e-04
Loss = 1.1557e-01, PNorm = 81.3852, GNorm = 0.8610, lr_0 = 2.8787e-04
Loss = 1.1060e-01, PNorm = 81.3957, GNorm = 0.7285, lr_0 = 2.8767e-04
Loss = 1.0396e-01, PNorm = 81.4018, GNorm = 0.6171, lr_0 = 2.8748e-04
Loss = 1.0333e-01, PNorm = 81.4082, GNorm = 0.9420, lr_0 = 2.8728e-04
Loss = 1.0028e-01, PNorm = 81.4088, GNorm = 0.8394, lr_0 = 2.8708e-04
Loss = 1.0311e-01, PNorm = 81.4151, GNorm = 1.0953, lr_0 = 2.8689e-04
Loss = 9.6817e-02, PNorm = 81.4214, GNorm = 0.7917, lr_0 = 2.8669e-04
Loss = 9.7414e-02, PNorm = 81.4296, GNorm = 0.5642, lr_0 = 2.8649e-04
Loss = 9.8089e-02, PNorm = 81.4334, GNorm = 0.5936, lr_0 = 2.8630e-04
Loss = 8.9233e-02, PNorm = 81.4352, GNorm = 0.6793, lr_0 = 2.8610e-04
Loss = 9.9453e-02, PNorm = 81.4395, GNorm = 0.5567, lr_0 = 2.8590e-04
Loss = 9.9997e-02, PNorm = 81.4456, GNorm = 0.8069, lr_0 = 2.8571e-04
Loss = 9.6317e-02, PNorm = 81.4520, GNorm = 0.6300, lr_0 = 2.8551e-04
Loss = 8.6891e-02, PNorm = 81.4596, GNorm = 0.5126, lr_0 = 2.8532e-04
Loss = 8.9022e-02, PNorm = 81.4636, GNorm = 0.8352, lr_0 = 2.8512e-04
Loss = 1.0093e-01, PNorm = 81.4679, GNorm = 0.9331, lr_0 = 2.8493e-04
Loss = 1.1875e-01, PNorm = 81.4753, GNorm = 0.6613, lr_0 = 2.8473e-04
Loss = 8.5801e-02, PNorm = 81.4836, GNorm = 0.4283, lr_0 = 2.8454e-04
Loss = 1.0780e-01, PNorm = 81.4876, GNorm = 0.7174, lr_0 = 2.8434e-04
Loss = 9.9591e-02, PNorm = 81.4929, GNorm = 0.4950, lr_0 = 2.8415e-04
Loss = 9.8205e-02, PNorm = 81.5001, GNorm = 0.6513, lr_0 = 2.8395e-04
Loss = 1.0101e-01, PNorm = 81.5055, GNorm = 0.7190, lr_0 = 2.8376e-04
Loss = 9.8734e-02, PNorm = 81.5097, GNorm = 1.0462, lr_0 = 2.8356e-04
Loss = 1.0276e-01, PNorm = 81.5135, GNorm = 0.5622, lr_0 = 2.8337e-04
Loss = 9.7251e-02, PNorm = 81.5221, GNorm = 0.7144, lr_0 = 2.8317e-04
Loss = 1.0140e-01, PNorm = 81.5271, GNorm = 0.5571, lr_0 = 2.8298e-04
Loss = 9.6065e-02, PNorm = 81.5271, GNorm = 0.9494, lr_0 = 2.8279e-04
Loss = 1.0231e-01, PNorm = 81.5342, GNorm = 0.7893, lr_0 = 2.8259e-04
Loss = 9.0867e-02, PNorm = 81.5402, GNorm = 0.4640, lr_0 = 2.8240e-04
Loss = 9.8214e-02, PNorm = 81.5473, GNorm = 0.7357, lr_0 = 2.8221e-04
Loss = 9.3747e-02, PNorm = 81.5545, GNorm = 0.6290, lr_0 = 2.8201e-04
Loss = 9.4447e-02, PNorm = 81.5596, GNorm = 0.5677, lr_0 = 2.8182e-04
Loss = 9.2272e-02, PNorm = 81.5670, GNorm = 0.8217, lr_0 = 2.8163e-04
Loss = 9.5671e-02, PNorm = 81.5747, GNorm = 0.6394, lr_0 = 2.8143e-04
Loss = 1.0633e-01, PNorm = 81.5813, GNorm = 0.9365, lr_0 = 2.8124e-04
Loss = 1.0839e-01, PNorm = 81.5883, GNorm = 0.7848, lr_0 = 2.8105e-04
Loss = 9.7152e-02, PNorm = 81.5969, GNorm = 1.1712, lr_0 = 2.8085e-04
Loss = 1.0432e-01, PNorm = 81.5999, GNorm = 0.7210, lr_0 = 2.8066e-04
Loss = 9.9208e-02, PNorm = 81.6058, GNorm = 0.6030, lr_0 = 2.8047e-04
Loss = 1.1078e-01, PNorm = 81.6095, GNorm = 0.5234, lr_0 = 2.8028e-04
Loss = 1.0385e-01, PNorm = 81.6144, GNorm = 0.7012, lr_0 = 2.8009e-04
Loss = 8.0505e-02, PNorm = 81.6193, GNorm = 0.4751, lr_0 = 2.7989e-04
Loss = 1.0506e-01, PNorm = 81.6222, GNorm = 0.6513, lr_0 = 2.7970e-04
Loss = 9.3664e-02, PNorm = 81.6221, GNorm = 0.6869, lr_0 = 2.7951e-04
Loss = 1.1507e-01, PNorm = 81.6285, GNorm = 0.6632, lr_0 = 2.7932e-04
Loss = 1.0237e-01, PNorm = 81.6350, GNorm = 0.6745, lr_0 = 2.7913e-04
Loss = 1.0062e-01, PNorm = 81.6402, GNorm = 1.0713, lr_0 = 2.7894e-04
Loss = 9.3767e-02, PNorm = 81.6452, GNorm = 0.7872, lr_0 = 2.7875e-04
Loss = 1.0081e-01, PNorm = 81.6562, GNorm = 0.8682, lr_0 = 2.7855e-04
Loss = 1.2164e-01, PNorm = 81.6627, GNorm = 0.7292, lr_0 = 2.7836e-04
Loss = 9.8961e-02, PNorm = 81.6666, GNorm = 0.8614, lr_0 = 2.7817e-04
Loss = 9.6269e-02, PNorm = 81.6723, GNorm = 0.5481, lr_0 = 2.7798e-04
Loss = 8.1096e-02, PNorm = 81.6782, GNorm = 0.4898, lr_0 = 2.7779e-04
Loss = 9.8043e-02, PNorm = 81.6821, GNorm = 0.6137, lr_0 = 2.7760e-04
Loss = 9.8607e-02, PNorm = 81.6842, GNorm = 0.7820, lr_0 = 2.7741e-04
Loss = 9.6589e-02, PNorm = 81.6883, GNorm = 0.6248, lr_0 = 2.7722e-04
Loss = 1.0973e-01, PNorm = 81.6950, GNorm = 0.6895, lr_0 = 2.7703e-04
Loss = 1.0768e-01, PNorm = 81.6976, GNorm = 0.9259, lr_0 = 2.7684e-04
Loss = 1.0406e-01, PNorm = 81.7040, GNorm = 0.6414, lr_0 = 2.7665e-04
Loss = 9.8725e-02, PNorm = 81.7064, GNorm = 0.6653, lr_0 = 2.7646e-04
Loss = 1.1174e-01, PNorm = 81.7092, GNorm = 0.5386, lr_0 = 2.7627e-04
Loss = 1.0671e-01, PNorm = 81.7151, GNorm = 0.7418, lr_0 = 2.7608e-04
Loss = 1.0425e-01, PNorm = 81.7204, GNorm = 1.1232, lr_0 = 2.7590e-04
Loss = 8.5674e-02, PNorm = 81.7271, GNorm = 0.6735, lr_0 = 2.7571e-04
Loss = 9.2190e-02, PNorm = 81.7285, GNorm = 0.7449, lr_0 = 2.7552e-04
Loss = 8.9396e-02, PNorm = 81.7360, GNorm = 0.5864, lr_0 = 2.7533e-04
Loss = 1.1264e-01, PNorm = 81.7399, GNorm = 0.7148, lr_0 = 2.7514e-04
Loss = 9.8822e-02, PNorm = 81.7469, GNorm = 0.7237, lr_0 = 2.7495e-04
Loss = 1.1403e-01, PNorm = 81.7515, GNorm = 1.1660, lr_0 = 2.7476e-04
Loss = 1.0002e-01, PNorm = 81.7520, GNorm = 0.6939, lr_0 = 2.7457e-04
Loss = 9.4688e-02, PNorm = 81.7549, GNorm = 0.7452, lr_0 = 2.7439e-04
Loss = 1.0965e-01, PNorm = 81.7587, GNorm = 0.5360, lr_0 = 2.7420e-04
Loss = 1.0578e-01, PNorm = 81.7625, GNorm = 0.5476, lr_0 = 2.7401e-04
Loss = 9.9442e-02, PNorm = 81.7694, GNorm = 0.6827, lr_0 = 2.7382e-04
Loss = 9.8194e-02, PNorm = 81.7769, GNorm = 0.6231, lr_0 = 2.7364e-04
Loss = 9.8255e-02, PNorm = 81.7824, GNorm = 0.7136, lr_0 = 2.7345e-04
Loss = 9.4965e-02, PNorm = 81.7888, GNorm = 0.7581, lr_0 = 2.7326e-04
Loss = 9.3830e-02, PNorm = 81.7941, GNorm = 0.6222, lr_0 = 2.7307e-04
Loss = 1.0177e-01, PNorm = 81.7997, GNorm = 0.5248, lr_0 = 2.7289e-04
Loss = 1.0002e-01, PNorm = 81.8055, GNorm = 0.6882, lr_0 = 2.7270e-04
Loss = 1.1963e-01, PNorm = 81.8129, GNorm = 0.7299, lr_0 = 2.7251e-04
Loss = 1.0606e-01, PNorm = 81.8199, GNorm = 0.6606, lr_0 = 2.7233e-04
Loss = 9.5399e-02, PNorm = 81.8245, GNorm = 0.5917, lr_0 = 2.7214e-04
Loss = 1.1279e-01, PNorm = 81.8271, GNorm = 1.0082, lr_0 = 2.7195e-04
Loss = 1.0001e-01, PNorm = 81.8346, GNorm = 0.5999, lr_0 = 2.7177e-04
Loss = 1.1015e-01, PNorm = 81.8387, GNorm = 0.5004, lr_0 = 2.7158e-04
Loss = 9.8702e-02, PNorm = 81.8475, GNorm = 0.5926, lr_0 = 2.7139e-04
Loss = 1.0442e-01, PNorm = 81.8535, GNorm = 0.6017, lr_0 = 2.7121e-04
Loss = 1.1166e-01, PNorm = 81.8579, GNorm = 1.0285, lr_0 = 2.7102e-04
Loss = 9.8195e-02, PNorm = 81.8604, GNorm = 0.5786, lr_0 = 2.7084e-04
Loss = 1.1078e-01, PNorm = 81.8611, GNorm = 0.7892, lr_0 = 2.7065e-04
Loss = 1.0279e-01, PNorm = 81.8671, GNorm = 0.5444, lr_0 = 2.7047e-04
Loss = 1.0340e-01, PNorm = 81.8725, GNorm = 0.5822, lr_0 = 2.7028e-04
Loss = 8.9810e-02, PNorm = 81.8783, GNorm = 0.5520, lr_0 = 2.7010e-04
Loss = 1.0260e-01, PNorm = 81.8843, GNorm = 0.5440, lr_0 = 2.6991e-04
Loss = 9.0624e-02, PNorm = 81.8914, GNorm = 0.5097, lr_0 = 2.6973e-04
Loss = 1.0289e-01, PNorm = 81.8969, GNorm = 0.6347, lr_0 = 2.6954e-04
Loss = 9.5846e-02, PNorm = 81.9041, GNorm = 0.9398, lr_0 = 2.6936e-04
Loss = 9.7762e-02, PNorm = 81.9102, GNorm = 0.6221, lr_0 = 2.6917e-04
Loss = 9.6909e-02, PNorm = 81.9168, GNorm = 0.5794, lr_0 = 2.6899e-04
Loss = 9.6434e-02, PNorm = 81.9241, GNorm = 0.6031, lr_0 = 2.6880e-04
Loss = 1.0180e-01, PNorm = 81.9296, GNorm = 0.6482, lr_0 = 2.6862e-04
Loss = 9.9921e-02, PNorm = 81.9355, GNorm = 0.5458, lr_0 = 2.6844e-04
Loss = 9.6368e-02, PNorm = 81.9394, GNorm = 0.5387, lr_0 = 2.6825e-04
Validation mae = 0.232503
Epoch 18
Loss = 8.6390e-02, PNorm = 81.9431, GNorm = 0.5817, lr_0 = 2.6807e-04
Loss = 1.0579e-01, PNorm = 81.9485, GNorm = 0.5079, lr_0 = 2.6788e-04
Loss = 9.2271e-02, PNorm = 81.9523, GNorm = 0.8044, lr_0 = 2.6770e-04
Loss = 8.5982e-02, PNorm = 81.9572, GNorm = 0.7051, lr_0 = 2.6752e-04
Loss = 9.8301e-02, PNorm = 81.9622, GNorm = 0.7656, lr_0 = 2.6733e-04
Loss = 9.2784e-02, PNorm = 81.9663, GNorm = 0.5802, lr_0 = 2.6715e-04
Loss = 9.3491e-02, PNorm = 81.9711, GNorm = 0.7528, lr_0 = 2.6697e-04
Loss = 9.8204e-02, PNorm = 81.9744, GNorm = 0.6192, lr_0 = 2.6678e-04
Loss = 8.2310e-02, PNorm = 81.9813, GNorm = 0.5214, lr_0 = 2.6660e-04
Loss = 1.0685e-01, PNorm = 81.9884, GNorm = 0.5278, lr_0 = 2.6642e-04
Loss = 9.1829e-02, PNorm = 81.9946, GNorm = 0.8845, lr_0 = 2.6624e-04
Loss = 1.0213e-01, PNorm = 82.0011, GNorm = 0.8247, lr_0 = 2.6605e-04
Loss = 1.0272e-01, PNorm = 82.0081, GNorm = 0.6089, lr_0 = 2.6587e-04
Loss = 9.0757e-02, PNorm = 82.0147, GNorm = 0.6038, lr_0 = 2.6569e-04
Loss = 7.9461e-02, PNorm = 82.0214, GNorm = 0.5837, lr_0 = 2.6551e-04
Loss = 9.9485e-02, PNorm = 82.0263, GNorm = 0.6194, lr_0 = 2.6533e-04
Loss = 8.8633e-02, PNorm = 82.0296, GNorm = 0.6380, lr_0 = 2.6514e-04
Loss = 9.2445e-02, PNorm = 82.0333, GNorm = 0.6483, lr_0 = 2.6496e-04
Loss = 8.1904e-02, PNorm = 82.0390, GNorm = 0.6032, lr_0 = 2.6478e-04
Loss = 8.3384e-02, PNorm = 82.0452, GNorm = 1.0962, lr_0 = 2.6460e-04
Loss = 8.5552e-02, PNorm = 82.0502, GNorm = 0.7047, lr_0 = 2.6442e-04
Loss = 9.0967e-02, PNorm = 82.0539, GNorm = 0.6003, lr_0 = 2.6424e-04
Loss = 1.0540e-01, PNorm = 82.0591, GNorm = 0.6086, lr_0 = 2.6406e-04
Loss = 9.5392e-02, PNorm = 82.0646, GNorm = 0.7375, lr_0 = 2.6388e-04
Loss = 8.8066e-02, PNorm = 82.0708, GNorm = 0.7139, lr_0 = 2.6369e-04
Loss = 9.6938e-02, PNorm = 82.0770, GNorm = 0.6353, lr_0 = 2.6351e-04
Loss = 1.0522e-01, PNorm = 82.0806, GNorm = 0.4271, lr_0 = 2.6333e-04
Loss = 8.4028e-02, PNorm = 82.0847, GNorm = 0.5847, lr_0 = 2.6315e-04
Loss = 1.0739e-01, PNorm = 82.0884, GNorm = 0.7020, lr_0 = 2.6297e-04
Loss = 9.0851e-02, PNorm = 82.0910, GNorm = 0.4696, lr_0 = 2.6279e-04
Loss = 8.6874e-02, PNorm = 82.0968, GNorm = 0.5904, lr_0 = 2.6261e-04
Loss = 1.0455e-01, PNorm = 82.1019, GNorm = 0.9449, lr_0 = 2.6243e-04
Loss = 9.4107e-02, PNorm = 82.1065, GNorm = 0.6461, lr_0 = 2.6225e-04
Loss = 9.7289e-02, PNorm = 82.1140, GNorm = 0.6910, lr_0 = 2.6207e-04
Loss = 9.4429e-02, PNorm = 82.1187, GNorm = 0.6955, lr_0 = 2.6189e-04
Loss = 8.3965e-02, PNorm = 82.1221, GNorm = 0.5737, lr_0 = 2.6171e-04
Loss = 9.9690e-02, PNorm = 82.1312, GNorm = 0.6954, lr_0 = 2.6153e-04
Loss = 9.9002e-02, PNorm = 82.1379, GNorm = 0.6267, lr_0 = 2.6136e-04
Loss = 9.8321e-02, PNorm = 82.1437, GNorm = 0.7535, lr_0 = 2.6118e-04
Loss = 1.0638e-01, PNorm = 82.1502, GNorm = 0.5821, lr_0 = 2.6100e-04
Loss = 9.8192e-02, PNorm = 82.1572, GNorm = 0.5637, lr_0 = 2.6082e-04
Loss = 9.5701e-02, PNorm = 82.1631, GNorm = 0.6145, lr_0 = 2.6064e-04
Loss = 9.6342e-02, PNorm = 82.1673, GNorm = 0.5515, lr_0 = 2.6046e-04
Loss = 8.8173e-02, PNorm = 82.1700, GNorm = 0.6001, lr_0 = 2.6028e-04
Loss = 9.8330e-02, PNorm = 82.1745, GNorm = 0.6192, lr_0 = 2.6011e-04
Loss = 8.7985e-02, PNorm = 82.1787, GNorm = 0.5403, lr_0 = 2.5993e-04
Loss = 1.1357e-01, PNorm = 82.1847, GNorm = 1.0781, lr_0 = 2.5975e-04
Loss = 1.0226e-01, PNorm = 82.1893, GNorm = 1.0521, lr_0 = 2.5957e-04
Loss = 1.0593e-01, PNorm = 82.1965, GNorm = 0.7235, lr_0 = 2.5939e-04
Loss = 8.4112e-02, PNorm = 82.2032, GNorm = 0.5762, lr_0 = 2.5922e-04
Loss = 8.4878e-02, PNorm = 82.2080, GNorm = 0.6274, lr_0 = 2.5904e-04
Loss = 1.0035e-01, PNorm = 82.2120, GNorm = 1.1542, lr_0 = 2.5886e-04
Loss = 7.8593e-02, PNorm = 82.2165, GNorm = 0.5358, lr_0 = 2.5868e-04
Loss = 1.1115e-01, PNorm = 82.2202, GNorm = 0.7123, lr_0 = 2.5851e-04
Loss = 1.0549e-01, PNorm = 82.2265, GNorm = 0.6860, lr_0 = 2.5833e-04
Loss = 9.6001e-02, PNorm = 82.2317, GNorm = 0.6666, lr_0 = 2.5815e-04
Loss = 1.0262e-01, PNorm = 82.2345, GNorm = 0.7745, lr_0 = 2.5797e-04
Loss = 1.0386e-01, PNorm = 82.2409, GNorm = 0.5713, lr_0 = 2.5780e-04
Loss = 1.0139e-01, PNorm = 82.2457, GNorm = 0.8573, lr_0 = 2.5762e-04
Loss = 1.0643e-01, PNorm = 82.2504, GNorm = 0.6078, lr_0 = 2.5745e-04
Loss = 9.5148e-02, PNorm = 82.2562, GNorm = 0.5288, lr_0 = 2.5727e-04
Loss = 9.9722e-02, PNorm = 82.2623, GNorm = 0.6396, lr_0 = 2.5709e-04
Loss = 1.0423e-01, PNorm = 82.2683, GNorm = 0.7861, lr_0 = 2.5692e-04
Loss = 1.0823e-01, PNorm = 82.2765, GNorm = 0.8221, lr_0 = 2.5674e-04
Loss = 9.0660e-02, PNorm = 82.2802, GNorm = 0.5289, lr_0 = 2.5656e-04
Loss = 9.1462e-02, PNorm = 82.2864, GNorm = 0.6384, lr_0 = 2.5639e-04
Loss = 9.3306e-02, PNorm = 82.2925, GNorm = 0.5931, lr_0 = 2.5621e-04
Loss = 1.0917e-01, PNorm = 82.2944, GNorm = 0.7694, lr_0 = 2.5604e-04
Loss = 1.0228e-01, PNorm = 82.2997, GNorm = 0.7530, lr_0 = 2.5586e-04
Loss = 1.0005e-01, PNorm = 82.3031, GNorm = 0.8113, lr_0 = 2.5569e-04
Loss = 8.5442e-02, PNorm = 82.3061, GNorm = 0.7248, lr_0 = 2.5551e-04
Loss = 1.0913e-01, PNorm = 82.3099, GNorm = 0.6930, lr_0 = 2.5534e-04
Loss = 9.1293e-02, PNorm = 82.3134, GNorm = 0.6454, lr_0 = 2.5516e-04
Loss = 9.8759e-02, PNorm = 82.3172, GNorm = 0.6843, lr_0 = 2.5499e-04
Loss = 1.0345e-01, PNorm = 82.3201, GNorm = 0.9461, lr_0 = 2.5481e-04
Loss = 9.3908e-02, PNorm = 82.3244, GNorm = 0.6533, lr_0 = 2.5464e-04
Loss = 1.0027e-01, PNorm = 82.3278, GNorm = 0.8331, lr_0 = 2.5446e-04
Loss = 1.0003e-01, PNorm = 82.3317, GNorm = 0.4081, lr_0 = 2.5429e-04
Loss = 9.2523e-02, PNorm = 82.3371, GNorm = 0.7784, lr_0 = 2.5411e-04
Loss = 9.8436e-02, PNorm = 82.3394, GNorm = 0.6438, lr_0 = 2.5394e-04
Loss = 9.5280e-02, PNorm = 82.3443, GNorm = 0.9441, lr_0 = 2.5377e-04
Loss = 1.2542e-01, PNorm = 82.3484, GNorm = 0.6024, lr_0 = 2.5359e-04
Loss = 9.6060e-02, PNorm = 82.3537, GNorm = 0.7636, lr_0 = 2.5342e-04
Loss = 1.0297e-01, PNorm = 82.3644, GNorm = 0.6592, lr_0 = 2.5325e-04
Loss = 9.5819e-02, PNorm = 82.3702, GNorm = 0.9427, lr_0 = 2.5307e-04
Loss = 8.7908e-02, PNorm = 82.3747, GNorm = 0.6197, lr_0 = 2.5290e-04
Loss = 1.0500e-01, PNorm = 82.3783, GNorm = 0.6911, lr_0 = 2.5273e-04
Loss = 8.8354e-02, PNorm = 82.3814, GNorm = 0.6259, lr_0 = 2.5255e-04
Loss = 1.0065e-01, PNorm = 82.3870, GNorm = 0.9737, lr_0 = 2.5238e-04
Loss = 1.0081e-01, PNorm = 82.3933, GNorm = 0.5955, lr_0 = 2.5221e-04
Loss = 1.0095e-01, PNorm = 82.3985, GNorm = 0.5125, lr_0 = 2.5203e-04
Loss = 1.0088e-01, PNorm = 82.4051, GNorm = 0.7352, lr_0 = 2.5186e-04
Loss = 9.2060e-02, PNorm = 82.4061, GNorm = 0.7870, lr_0 = 2.5169e-04
Loss = 9.6431e-02, PNorm = 82.4099, GNorm = 0.4861, lr_0 = 2.5152e-04
Loss = 9.0304e-02, PNorm = 82.4143, GNorm = 0.7704, lr_0 = 2.5134e-04
Loss = 9.3982e-02, PNorm = 82.4200, GNorm = 0.6259, lr_0 = 2.5117e-04
Loss = 9.4943e-02, PNorm = 82.4261, GNorm = 0.7977, lr_0 = 2.5100e-04
Loss = 8.7111e-02, PNorm = 82.4299, GNorm = 0.5996, lr_0 = 2.5083e-04
Loss = 9.4019e-02, PNorm = 82.4326, GNorm = 0.7425, lr_0 = 2.5066e-04
Loss = 9.6062e-02, PNorm = 82.4369, GNorm = 0.5190, lr_0 = 2.5048e-04
Loss = 1.0351e-01, PNorm = 82.4440, GNorm = 0.5588, lr_0 = 2.5031e-04
Loss = 1.0616e-01, PNorm = 82.4465, GNorm = 0.6805, lr_0 = 2.5014e-04
Loss = 9.7809e-02, PNorm = 82.4506, GNorm = 0.6796, lr_0 = 2.4997e-04
Loss = 8.0175e-02, PNorm = 82.4513, GNorm = 0.9353, lr_0 = 2.4980e-04
Loss = 8.8603e-02, PNorm = 82.4553, GNorm = 0.8494, lr_0 = 2.4963e-04
Loss = 1.1843e-01, PNorm = 82.4603, GNorm = 0.7274, lr_0 = 2.4946e-04
Loss = 1.0989e-01, PNorm = 82.4637, GNorm = 0.6841, lr_0 = 2.4929e-04
Loss = 9.3362e-02, PNorm = 82.4703, GNorm = 0.6171, lr_0 = 2.4911e-04
Loss = 9.1726e-02, PNorm = 82.4763, GNorm = 0.7068, lr_0 = 2.4894e-04
Loss = 9.3519e-02, PNorm = 82.4811, GNorm = 0.5568, lr_0 = 2.4877e-04
Loss = 9.4600e-02, PNorm = 82.4883, GNorm = 0.5513, lr_0 = 2.4860e-04
Loss = 1.0606e-01, PNorm = 82.4933, GNorm = 0.6316, lr_0 = 2.4843e-04
Loss = 9.5772e-02, PNorm = 82.4975, GNorm = 0.7178, lr_0 = 2.4826e-04
Loss = 1.2381e-01, PNorm = 82.5063, GNorm = 0.7332, lr_0 = 2.4809e-04
Loss = 9.4249e-02, PNorm = 82.5134, GNorm = 0.5490, lr_0 = 2.4792e-04
Loss = 1.0460e-01, PNorm = 82.5164, GNorm = 0.9428, lr_0 = 2.4775e-04
Loss = 9.7190e-02, PNorm = 82.5208, GNorm = 0.4907, lr_0 = 2.4758e-04
Loss = 9.7712e-02, PNorm = 82.5274, GNorm = 0.5883, lr_0 = 2.4741e-04
Loss = 9.6506e-02, PNorm = 82.5317, GNorm = 0.5559, lr_0 = 2.4724e-04
Loss = 1.0615e-01, PNorm = 82.5381, GNorm = 0.6799, lr_0 = 2.4707e-04
Validation mae = 0.228571
Epoch 19
Loss = 8.3376e-02, PNorm = 82.5422, GNorm = 0.6314, lr_0 = 2.4690e-04
Loss = 9.0849e-02, PNorm = 82.5441, GNorm = 0.5508, lr_0 = 2.4674e-04
Loss = 9.5780e-02, PNorm = 82.5465, GNorm = 0.5720, lr_0 = 2.4657e-04
Loss = 9.0630e-02, PNorm = 82.5518, GNorm = 0.4999, lr_0 = 2.4640e-04
Loss = 9.9902e-02, PNorm = 82.5578, GNorm = 0.6524, lr_0 = 2.4623e-04
Loss = 8.9051e-02, PNorm = 82.5653, GNorm = 0.5480, lr_0 = 2.4606e-04
Loss = 1.0319e-01, PNorm = 82.5695, GNorm = 0.5473, lr_0 = 2.4589e-04
Loss = 9.9406e-02, PNorm = 82.5731, GNorm = 0.7048, lr_0 = 2.4572e-04
Loss = 1.0727e-01, PNorm = 82.5834, GNorm = 0.6091, lr_0 = 2.4556e-04
Loss = 9.7284e-02, PNorm = 82.5908, GNorm = 0.9967, lr_0 = 2.4539e-04
Loss = 8.3867e-02, PNorm = 82.5945, GNorm = 0.8024, lr_0 = 2.4522e-04
Loss = 9.3651e-02, PNorm = 82.5969, GNorm = 0.5343, lr_0 = 2.4505e-04
Loss = 7.8486e-02, PNorm = 82.6008, GNorm = 0.3684, lr_0 = 2.4488e-04
Loss = 8.8947e-02, PNorm = 82.6047, GNorm = 0.5773, lr_0 = 2.4472e-04
Loss = 8.9994e-02, PNorm = 82.6082, GNorm = 0.8152, lr_0 = 2.4455e-04
Loss = 8.9231e-02, PNorm = 82.6103, GNorm = 0.7089, lr_0 = 2.4438e-04
Loss = 8.5770e-02, PNorm = 82.6139, GNorm = 0.5339, lr_0 = 2.4421e-04
Loss = 8.7740e-02, PNorm = 82.6169, GNorm = 0.5441, lr_0 = 2.4405e-04
Loss = 7.7002e-02, PNorm = 82.6230, GNorm = 0.4596, lr_0 = 2.4388e-04
Loss = 9.1362e-02, PNorm = 82.6278, GNorm = 0.8607, lr_0 = 2.4371e-04
Loss = 9.1729e-02, PNorm = 82.6343, GNorm = 0.5866, lr_0 = 2.4354e-04
Loss = 9.5086e-02, PNorm = 82.6378, GNorm = 0.8921, lr_0 = 2.4338e-04
Loss = 1.0842e-01, PNorm = 82.6422, GNorm = 0.6523, lr_0 = 2.4321e-04
Loss = 9.2427e-02, PNorm = 82.6488, GNorm = 0.6522, lr_0 = 2.4304e-04
Loss = 1.0165e-01, PNorm = 82.6536, GNorm = 0.7521, lr_0 = 2.4288e-04
Loss = 9.1771e-02, PNorm = 82.6580, GNorm = 0.5248, lr_0 = 2.4271e-04
Loss = 9.6667e-02, PNorm = 82.6639, GNorm = 0.8933, lr_0 = 2.4254e-04
Loss = 1.0027e-01, PNorm = 82.6667, GNorm = 0.6251, lr_0 = 2.4238e-04
Loss = 9.8529e-02, PNorm = 82.6719, GNorm = 0.7137, lr_0 = 2.4221e-04
Loss = 9.1723e-02, PNorm = 82.6777, GNorm = 0.5701, lr_0 = 2.4205e-04
Loss = 9.7495e-02, PNorm = 82.6828, GNorm = 0.8083, lr_0 = 2.4188e-04
Loss = 9.8061e-02, PNorm = 82.6860, GNorm = 0.6302, lr_0 = 2.4171e-04
Loss = 8.7238e-02, PNorm = 82.6916, GNorm = 0.4946, lr_0 = 2.4155e-04
Loss = 9.4854e-02, PNorm = 82.6961, GNorm = 0.7638, lr_0 = 2.4138e-04
Loss = 1.0559e-01, PNorm = 82.6994, GNorm = 0.5314, lr_0 = 2.4122e-04
Loss = 8.7055e-02, PNorm = 82.7043, GNorm = 0.6432, lr_0 = 2.4105e-04
Loss = 9.4199e-02, PNorm = 82.7085, GNorm = 0.6305, lr_0 = 2.4089e-04
Loss = 1.0144e-01, PNorm = 82.7131, GNorm = 0.7180, lr_0 = 2.4072e-04
Loss = 9.1090e-02, PNorm = 82.7128, GNorm = 0.6770, lr_0 = 2.4056e-04
Loss = 9.9308e-02, PNorm = 82.7159, GNorm = 0.7180, lr_0 = 2.4039e-04
Loss = 9.7063e-02, PNorm = 82.7207, GNorm = 0.6631, lr_0 = 2.4023e-04
Loss = 8.2431e-02, PNorm = 82.7250, GNorm = 0.4108, lr_0 = 2.4006e-04
Loss = 9.6024e-02, PNorm = 82.7284, GNorm = 0.6908, lr_0 = 2.3990e-04
Loss = 9.5423e-02, PNorm = 82.7320, GNorm = 0.7593, lr_0 = 2.3974e-04
Loss = 9.7212e-02, PNorm = 82.7361, GNorm = 0.7044, lr_0 = 2.3957e-04
Loss = 9.2259e-02, PNorm = 82.7424, GNorm = 0.8658, lr_0 = 2.3941e-04
Loss = 8.3773e-02, PNorm = 82.7477, GNorm = 0.6247, lr_0 = 2.3924e-04
Loss = 1.0205e-01, PNorm = 82.7518, GNorm = 1.3937, lr_0 = 2.3908e-04
Loss = 8.9446e-02, PNorm = 82.7593, GNorm = 0.7460, lr_0 = 2.3892e-04
Loss = 8.4022e-02, PNorm = 82.7658, GNorm = 0.5372, lr_0 = 2.3875e-04
Loss = 9.2363e-02, PNorm = 82.7704, GNorm = 0.5076, lr_0 = 2.3859e-04
Loss = 8.6823e-02, PNorm = 82.7756, GNorm = 0.5559, lr_0 = 2.3842e-04
Loss = 9.1664e-02, PNorm = 82.7793, GNorm = 0.5763, lr_0 = 2.3826e-04
Loss = 1.0254e-01, PNorm = 82.7847, GNorm = 0.7502, lr_0 = 2.3810e-04
Loss = 1.0926e-01, PNorm = 82.7883, GNorm = 0.6539, lr_0 = 2.3794e-04
Loss = 9.6972e-02, PNorm = 82.7935, GNorm = 0.9414, lr_0 = 2.3777e-04
Loss = 8.8368e-02, PNorm = 82.8009, GNorm = 0.6943, lr_0 = 2.3761e-04
Loss = 9.4433e-02, PNorm = 82.8046, GNorm = 0.5744, lr_0 = 2.3745e-04
Loss = 8.4785e-02, PNorm = 82.8111, GNorm = 0.9905, lr_0 = 2.3728e-04
Loss = 9.8925e-02, PNorm = 82.8155, GNorm = 0.7965, lr_0 = 2.3712e-04
Loss = 9.7136e-02, PNorm = 82.8193, GNorm = 0.5883, lr_0 = 2.3696e-04
Loss = 9.8658e-02, PNorm = 82.8241, GNorm = 0.8056, lr_0 = 2.3680e-04
Loss = 9.5787e-02, PNorm = 82.8256, GNorm = 0.6668, lr_0 = 2.3663e-04
Loss = 8.6866e-02, PNorm = 82.8303, GNorm = 0.6577, lr_0 = 2.3647e-04
Loss = 9.4239e-02, PNorm = 82.8354, GNorm = 0.6762, lr_0 = 2.3631e-04
Loss = 8.8977e-02, PNorm = 82.8377, GNorm = 0.5176, lr_0 = 2.3615e-04
Loss = 8.4727e-02, PNorm = 82.8396, GNorm = 0.5960, lr_0 = 2.3599e-04
Loss = 9.2512e-02, PNorm = 82.8429, GNorm = 0.7802, lr_0 = 2.3582e-04
Loss = 9.7562e-02, PNorm = 82.8454, GNorm = 0.6185, lr_0 = 2.3566e-04
Loss = 1.0010e-01, PNorm = 82.8508, GNorm = 0.8001, lr_0 = 2.3550e-04
Loss = 9.7168e-02, PNorm = 82.8561, GNorm = 0.6804, lr_0 = 2.3534e-04
Loss = 9.5974e-02, PNorm = 82.8604, GNorm = 0.6211, lr_0 = 2.3518e-04
Loss = 1.0399e-01, PNorm = 82.8674, GNorm = 1.1419, lr_0 = 2.3502e-04
Loss = 1.0333e-01, PNorm = 82.8720, GNorm = 0.6090, lr_0 = 2.3486e-04
Loss = 8.5658e-02, PNorm = 82.8796, GNorm = 0.5756, lr_0 = 2.3470e-04
Loss = 1.0217e-01, PNorm = 82.8834, GNorm = 0.4663, lr_0 = 2.3454e-04
Loss = 8.8839e-02, PNorm = 82.8891, GNorm = 0.5336, lr_0 = 2.3437e-04
Loss = 1.0013e-01, PNorm = 82.8956, GNorm = 0.8364, lr_0 = 2.3421e-04
Loss = 9.1785e-02, PNorm = 82.8996, GNorm = 0.9621, lr_0 = 2.3405e-04
Loss = 8.7257e-02, PNorm = 82.9025, GNorm = 0.8010, lr_0 = 2.3389e-04
Loss = 1.0296e-01, PNorm = 82.9048, GNorm = 0.5461, lr_0 = 2.3373e-04
Loss = 9.2679e-02, PNorm = 82.9091, GNorm = 0.7521, lr_0 = 2.3357e-04
Loss = 9.2778e-02, PNorm = 82.9111, GNorm = 0.8883, lr_0 = 2.3341e-04
Loss = 9.7648e-02, PNorm = 82.9159, GNorm = 0.5500, lr_0 = 2.3325e-04
Loss = 9.4553e-02, PNorm = 82.9212, GNorm = 0.4382, lr_0 = 2.3309e-04
Loss = 9.9690e-02, PNorm = 82.9251, GNorm = 0.6553, lr_0 = 2.3293e-04
Loss = 8.9476e-02, PNorm = 82.9292, GNorm = 0.7964, lr_0 = 2.3277e-04
Loss = 8.4376e-02, PNorm = 82.9316, GNorm = 0.5149, lr_0 = 2.3261e-04
Loss = 1.0271e-01, PNorm = 82.9370, GNorm = 0.7234, lr_0 = 2.3246e-04
Loss = 1.0752e-01, PNorm = 82.9404, GNorm = 0.7378, lr_0 = 2.3230e-04
Loss = 1.0474e-01, PNorm = 82.9456, GNorm = 0.7506, lr_0 = 2.3214e-04
Loss = 9.9994e-02, PNorm = 82.9514, GNorm = 0.7821, lr_0 = 2.3198e-04
Loss = 1.0601e-01, PNorm = 82.9572, GNorm = 0.6031, lr_0 = 2.3182e-04
Loss = 1.0846e-01, PNorm = 82.9627, GNorm = 0.7295, lr_0 = 2.3166e-04
Loss = 9.8820e-02, PNorm = 82.9666, GNorm = 0.6260, lr_0 = 2.3150e-04
Loss = 8.4903e-02, PNorm = 82.9716, GNorm = 0.5428, lr_0 = 2.3134e-04
Loss = 1.0678e-01, PNorm = 82.9781, GNorm = 0.7485, lr_0 = 2.3118e-04
Loss = 1.0651e-01, PNorm = 82.9850, GNorm = 0.8328, lr_0 = 2.3103e-04
Loss = 9.4187e-02, PNorm = 82.9890, GNorm = 0.6088, lr_0 = 2.3087e-04
Loss = 9.1645e-02, PNorm = 82.9917, GNorm = 0.6547, lr_0 = 2.3071e-04
Loss = 1.0001e-01, PNorm = 82.9956, GNorm = 0.7431, lr_0 = 2.3055e-04
Loss = 1.0142e-01, PNorm = 82.9995, GNorm = 1.1423, lr_0 = 2.3039e-04
Loss = 9.2346e-02, PNorm = 83.0040, GNorm = 0.5178, lr_0 = 2.3024e-04
Loss = 8.0727e-02, PNorm = 83.0068, GNorm = 0.5799, lr_0 = 2.3008e-04
Loss = 8.3831e-02, PNorm = 83.0090, GNorm = 0.4439, lr_0 = 2.2992e-04
Loss = 8.5553e-02, PNorm = 83.0115, GNorm = 0.7781, lr_0 = 2.2976e-04
Loss = 9.4172e-02, PNorm = 83.0149, GNorm = 0.6119, lr_0 = 2.2961e-04
Loss = 9.6091e-02, PNorm = 83.0204, GNorm = 0.6016, lr_0 = 2.2945e-04
Loss = 1.0322e-01, PNorm = 83.0251, GNorm = 0.8723, lr_0 = 2.2929e-04
Loss = 9.0659e-02, PNorm = 83.0290, GNorm = 0.8373, lr_0 = 2.2913e-04
Loss = 9.0625e-02, PNorm = 83.0301, GNorm = 0.7378, lr_0 = 2.2898e-04
Loss = 9.0525e-02, PNorm = 83.0320, GNorm = 0.9615, lr_0 = 2.2882e-04
Loss = 1.0728e-01, PNorm = 83.0368, GNorm = 0.7227, lr_0 = 2.2866e-04
Loss = 9.1264e-02, PNorm = 83.0439, GNorm = 0.5717, lr_0 = 2.2851e-04
Loss = 8.9770e-02, PNorm = 83.0494, GNorm = 0.6237, lr_0 = 2.2835e-04
Loss = 8.5746e-02, PNorm = 83.0523, GNorm = 0.6697, lr_0 = 2.2819e-04
Loss = 9.5759e-02, PNorm = 83.0569, GNorm = 0.6667, lr_0 = 2.2804e-04
Loss = 8.8431e-02, PNorm = 83.0620, GNorm = 0.7382, lr_0 = 2.2788e-04
Loss = 9.3294e-02, PNorm = 83.0634, GNorm = 0.6082, lr_0 = 2.2773e-04
Loss = 1.0172e-01, PNorm = 83.0657, GNorm = 0.6897, lr_0 = 2.2757e-04
Validation mae = 0.227948
Epoch 20
Loss = 9.6044e-02, PNorm = 83.0682, GNorm = 0.7727, lr_0 = 2.2741e-04
Loss = 8.4000e-02, PNorm = 83.0726, GNorm = 0.6351, lr_0 = 2.2726e-04
Loss = 9.5147e-02, PNorm = 83.0778, GNorm = 0.6764, lr_0 = 2.2710e-04
Loss = 9.4074e-02, PNorm = 83.0841, GNorm = 0.5490, lr_0 = 2.2695e-04
Loss = 1.0132e-01, PNorm = 83.0899, GNorm = 0.8098, lr_0 = 2.2679e-04
Loss = 8.9876e-02, PNorm = 83.0949, GNorm = 0.7756, lr_0 = 2.2664e-04
Loss = 8.2797e-02, PNorm = 83.0997, GNorm = 0.7035, lr_0 = 2.2648e-04
Loss = 8.6869e-02, PNorm = 83.1007, GNorm = 0.8604, lr_0 = 2.2632e-04
Loss = 9.2695e-02, PNorm = 83.1044, GNorm = 0.8102, lr_0 = 2.2617e-04
Loss = 9.6594e-02, PNorm = 83.1062, GNorm = 0.6786, lr_0 = 2.2601e-04
Loss = 8.2016e-02, PNorm = 83.1100, GNorm = 0.6191, lr_0 = 2.2586e-04
Loss = 9.5900e-02, PNorm = 83.1107, GNorm = 0.5442, lr_0 = 2.2571e-04
Loss = 9.4481e-02, PNorm = 83.1140, GNorm = 0.7255, lr_0 = 2.2555e-04
Loss = 8.7266e-02, PNorm = 83.1162, GNorm = 0.6290, lr_0 = 2.2540e-04
Loss = 9.1840e-02, PNorm = 83.1205, GNorm = 0.8480, lr_0 = 2.2524e-04
Loss = 7.4629e-02, PNorm = 83.1243, GNorm = 0.6019, lr_0 = 2.2509e-04
Loss = 8.8111e-02, PNorm = 83.1290, GNorm = 0.5508, lr_0 = 2.2493e-04
Loss = 7.8145e-02, PNorm = 83.1307, GNorm = 0.4943, lr_0 = 2.2478e-04
Loss = 8.6188e-02, PNorm = 83.1362, GNorm = 0.9516, lr_0 = 2.2463e-04
Loss = 9.3800e-02, PNorm = 83.1406, GNorm = 0.5203, lr_0 = 2.2447e-04
Loss = 8.4033e-02, PNorm = 83.1464, GNorm = 0.7570, lr_0 = 2.2432e-04
Loss = 9.6265e-02, PNorm = 83.1530, GNorm = 0.4935, lr_0 = 2.2416e-04
Loss = 8.4330e-02, PNorm = 83.1587, GNorm = 0.6317, lr_0 = 2.2401e-04
Loss = 8.4279e-02, PNorm = 83.1623, GNorm = 0.5327, lr_0 = 2.2386e-04
Loss = 8.9937e-02, PNorm = 83.1684, GNorm = 0.8977, lr_0 = 2.2370e-04
Loss = 8.9757e-02, PNorm = 83.1741, GNorm = 0.6268, lr_0 = 2.2355e-04
Loss = 1.0058e-01, PNorm = 83.1791, GNorm = 0.4819, lr_0 = 2.2340e-04
Loss = 9.7923e-02, PNorm = 83.1856, GNorm = 0.5257, lr_0 = 2.2324e-04
Loss = 1.0616e-01, PNorm = 83.1904, GNorm = 0.8999, lr_0 = 2.2309e-04
Loss = 9.3034e-02, PNorm = 83.1916, GNorm = 0.8914, lr_0 = 2.2294e-04
Loss = 8.0002e-02, PNorm = 83.1934, GNorm = 0.4507, lr_0 = 2.2279e-04
Loss = 9.0043e-02, PNorm = 83.1958, GNorm = 0.6758, lr_0 = 2.2263e-04
Loss = 9.5856e-02, PNorm = 83.2018, GNorm = 0.7223, lr_0 = 2.2248e-04
Loss = 8.1646e-02, PNorm = 83.2060, GNorm = 0.7625, lr_0 = 2.2233e-04
Loss = 8.5570e-02, PNorm = 83.2083, GNorm = 0.5877, lr_0 = 2.2218e-04
Loss = 8.5013e-02, PNorm = 83.2121, GNorm = 0.7030, lr_0 = 2.2202e-04
Loss = 8.8759e-02, PNorm = 83.2163, GNorm = 0.7653, lr_0 = 2.2187e-04
Loss = 9.0224e-02, PNorm = 83.2203, GNorm = 0.5681, lr_0 = 2.2172e-04
Loss = 9.2759e-02, PNorm = 83.2241, GNorm = 0.5196, lr_0 = 2.2157e-04
Loss = 1.0444e-01, PNorm = 83.2263, GNorm = 0.7351, lr_0 = 2.2142e-04
Loss = 9.0026e-02, PNorm = 83.2321, GNorm = 0.9067, lr_0 = 2.2126e-04
Loss = 8.1249e-02, PNorm = 83.2371, GNorm = 0.7470, lr_0 = 2.2111e-04
Loss = 9.0065e-02, PNorm = 83.2428, GNorm = 0.6632, lr_0 = 2.2096e-04
Loss = 9.4478e-02, PNorm = 83.2458, GNorm = 0.8187, lr_0 = 2.2081e-04
Loss = 7.8076e-02, PNorm = 83.2497, GNorm = 0.8803, lr_0 = 2.2066e-04
Loss = 1.0550e-01, PNorm = 83.2530, GNorm = 0.6398, lr_0 = 2.2051e-04
Loss = 8.4104e-02, PNorm = 83.2581, GNorm = 0.6347, lr_0 = 2.2036e-04
Loss = 8.6540e-02, PNorm = 83.2607, GNorm = 0.7879, lr_0 = 2.2021e-04
Loss = 9.3981e-02, PNorm = 83.2661, GNorm = 0.6348, lr_0 = 2.2005e-04
Loss = 8.9755e-02, PNorm = 83.2698, GNorm = 0.5673, lr_0 = 2.1990e-04
Loss = 9.5447e-02, PNorm = 83.2738, GNorm = 0.6100, lr_0 = 2.1975e-04
Loss = 9.4720e-02, PNorm = 83.2778, GNorm = 0.5565, lr_0 = 2.1960e-04
Loss = 8.6200e-02, PNorm = 83.2815, GNorm = 0.5086, lr_0 = 2.1945e-04
Loss = 9.5396e-02, PNorm = 83.2857, GNorm = 0.5662, lr_0 = 2.1930e-04
Loss = 1.0205e-01, PNorm = 83.2904, GNorm = 0.5903, lr_0 = 2.1915e-04
Loss = 9.4943e-02, PNorm = 83.2949, GNorm = 0.5941, lr_0 = 2.1900e-04
Loss = 8.0486e-02, PNorm = 83.2997, GNorm = 0.6728, lr_0 = 2.1885e-04
Loss = 8.7172e-02, PNorm = 83.3031, GNorm = 0.6090, lr_0 = 2.1870e-04
Loss = 8.1185e-02, PNorm = 83.3041, GNorm = 0.5042, lr_0 = 2.1855e-04
Loss = 9.9302e-02, PNorm = 83.3067, GNorm = 0.6855, lr_0 = 2.1840e-04
Loss = 1.0261e-01, PNorm = 83.3095, GNorm = 0.8281, lr_0 = 2.1825e-04
Loss = 8.2170e-02, PNorm = 83.3130, GNorm = 0.5380, lr_0 = 2.1810e-04
Loss = 9.3210e-02, PNorm = 83.3182, GNorm = 0.7272, lr_0 = 2.1795e-04
Loss = 9.2201e-02, PNorm = 83.3196, GNorm = 0.9023, lr_0 = 2.1780e-04
Loss = 8.9542e-02, PNorm = 83.3223, GNorm = 0.5819, lr_0 = 2.1765e-04
Loss = 1.0923e-01, PNorm = 83.3242, GNorm = 0.6765, lr_0 = 2.1751e-04
Loss = 1.0016e-01, PNorm = 83.3236, GNorm = 0.7121, lr_0 = 2.1736e-04
Loss = 9.4092e-02, PNorm = 83.3257, GNorm = 0.8878, lr_0 = 2.1721e-04
Loss = 9.8906e-02, PNorm = 83.3311, GNorm = 0.6514, lr_0 = 2.1706e-04
Loss = 9.1348e-02, PNorm = 83.3356, GNorm = 0.6625, lr_0 = 2.1691e-04
Loss = 8.3364e-02, PNorm = 83.3401, GNorm = 0.6377, lr_0 = 2.1676e-04
Loss = 9.7473e-02, PNorm = 83.3440, GNorm = 0.7724, lr_0 = 2.1661e-04
Loss = 9.9981e-02, PNorm = 83.3484, GNorm = 0.4561, lr_0 = 2.1646e-04
Loss = 7.5961e-02, PNorm = 83.3533, GNorm = 0.4980, lr_0 = 2.1632e-04
Loss = 9.4797e-02, PNorm = 83.3575, GNorm = 0.7079, lr_0 = 2.1617e-04
Loss = 9.3675e-02, PNorm = 83.3623, GNorm = 0.8170, lr_0 = 2.1602e-04
Loss = 9.5750e-02, PNorm = 83.3683, GNorm = 0.6585, lr_0 = 2.1587e-04
Loss = 8.6543e-02, PNorm = 83.3734, GNorm = 0.7099, lr_0 = 2.1572e-04
Loss = 9.3718e-02, PNorm = 83.3771, GNorm = 0.7791, lr_0 = 2.1558e-04
Loss = 1.0034e-01, PNorm = 83.3822, GNorm = 0.5850, lr_0 = 2.1543e-04
Loss = 9.0481e-02, PNorm = 83.3866, GNorm = 0.7544, lr_0 = 2.1528e-04
Loss = 9.7574e-02, PNorm = 83.3890, GNorm = 0.9100, lr_0 = 2.1513e-04
Loss = 8.3245e-02, PNorm = 83.3926, GNorm = 0.4343, lr_0 = 2.1499e-04
Loss = 9.9107e-02, PNorm = 83.3958, GNorm = 0.6994, lr_0 = 2.1484e-04
Loss = 8.8533e-02, PNorm = 83.4003, GNorm = 0.5811, lr_0 = 2.1469e-04
Loss = 9.1285e-02, PNorm = 83.4026, GNorm = 0.5253, lr_0 = 2.1454e-04
Loss = 9.2883e-02, PNorm = 83.4046, GNorm = 0.7659, lr_0 = 2.1440e-04
Loss = 8.2746e-02, PNorm = 83.4054, GNorm = 0.7421, lr_0 = 2.1425e-04
Loss = 1.0292e-01, PNorm = 83.4080, GNorm = 0.7657, lr_0 = 2.1410e-04
Loss = 8.9667e-02, PNorm = 83.4126, GNorm = 0.4931, lr_0 = 2.1396e-04
Loss = 8.4186e-02, PNorm = 83.4153, GNorm = 0.8354, lr_0 = 2.1381e-04
Loss = 9.3725e-02, PNorm = 83.4179, GNorm = 0.5898, lr_0 = 2.1366e-04
Loss = 9.5551e-02, PNorm = 83.4221, GNorm = 0.4974, lr_0 = 2.1352e-04
Loss = 1.0649e-01, PNorm = 83.4257, GNorm = 0.6618, lr_0 = 2.1337e-04
Loss = 7.7985e-02, PNorm = 83.4312, GNorm = 0.5500, lr_0 = 2.1323e-04
Loss = 1.0204e-01, PNorm = 83.4354, GNorm = 0.5621, lr_0 = 2.1308e-04
Loss = 9.5866e-02, PNorm = 83.4415, GNorm = 0.6625, lr_0 = 2.1293e-04
Loss = 8.5489e-02, PNorm = 83.4437, GNorm = 0.4067, lr_0 = 2.1279e-04
Loss = 9.1273e-02, PNorm = 83.4492, GNorm = 0.6988, lr_0 = 2.1264e-04
Loss = 1.0086e-01, PNorm = 83.4555, GNorm = 0.5787, lr_0 = 2.1250e-04
Loss = 1.0386e-01, PNorm = 83.4594, GNorm = 0.4957, lr_0 = 2.1235e-04
Loss = 1.0161e-01, PNorm = 83.4626, GNorm = 0.6461, lr_0 = 2.1221e-04
Loss = 8.7179e-02, PNorm = 83.4671, GNorm = 0.5591, lr_0 = 2.1206e-04
Loss = 9.4307e-02, PNorm = 83.4702, GNorm = 0.5635, lr_0 = 2.1191e-04
Loss = 9.5705e-02, PNorm = 83.4740, GNorm = 0.6354, lr_0 = 2.1177e-04
Loss = 9.5586e-02, PNorm = 83.4762, GNorm = 0.6621, lr_0 = 2.1162e-04
Loss = 1.0382e-01, PNorm = 83.4822, GNorm = 0.6809, lr_0 = 2.1148e-04
Loss = 8.5013e-02, PNorm = 83.4868, GNorm = 0.5869, lr_0 = 2.1133e-04
Loss = 1.0102e-01, PNorm = 83.4856, GNorm = 0.7370, lr_0 = 2.1119e-04
Loss = 9.1094e-02, PNorm = 83.4881, GNorm = 0.7767, lr_0 = 2.1104e-04
Loss = 9.9702e-02, PNorm = 83.4940, GNorm = 0.8754, lr_0 = 2.1090e-04
Loss = 8.7480e-02, PNorm = 83.4991, GNorm = 0.7496, lr_0 = 2.1076e-04
Loss = 8.5670e-02, PNorm = 83.5048, GNorm = 1.0111, lr_0 = 2.1061e-04
Loss = 9.4556e-02, PNorm = 83.5074, GNorm = 0.5347, lr_0 = 2.1047e-04
Loss = 9.0933e-02, PNorm = 83.5106, GNorm = 0.4902, lr_0 = 2.1032e-04
Loss = 8.6598e-02, PNorm = 83.5138, GNorm = 0.3799, lr_0 = 2.1018e-04
Loss = 8.6492e-02, PNorm = 83.5173, GNorm = 0.6956, lr_0 = 2.1003e-04
Loss = 9.1888e-02, PNorm = 83.5218, GNorm = 0.5744, lr_0 = 2.0989e-04
Loss = 9.3193e-02, PNorm = 83.5235, GNorm = 0.8357, lr_0 = 2.0975e-04
Loss = 9.5899e-02, PNorm = 83.5277, GNorm = 0.8579, lr_0 = 2.0960e-04
Validation mae = 0.230172
Epoch 21
Loss = 9.1342e-02, PNorm = 83.5323, GNorm = 0.5287, lr_0 = 2.0946e-04
Loss = 8.1547e-02, PNorm = 83.5350, GNorm = 0.6043, lr_0 = 2.0932e-04
Loss = 9.9976e-02, PNorm = 83.5410, GNorm = 0.6647, lr_0 = 2.0917e-04
Loss = 8.9988e-02, PNorm = 83.5439, GNorm = 0.9723, lr_0 = 2.0903e-04
Loss = 1.0246e-01, PNorm = 83.5488, GNorm = 0.8548, lr_0 = 2.0889e-04
Loss = 8.1347e-02, PNorm = 83.5529, GNorm = 0.6332, lr_0 = 2.0874e-04
Loss = 9.8371e-02, PNorm = 83.5591, GNorm = 0.8617, lr_0 = 2.0860e-04
Loss = 9.7419e-02, PNorm = 83.5632, GNorm = 1.1491, lr_0 = 2.0846e-04
Loss = 8.3025e-02, PNorm = 83.5674, GNorm = 0.5992, lr_0 = 2.0831e-04
Loss = 8.4028e-02, PNorm = 83.5713, GNorm = 0.5278, lr_0 = 2.0817e-04
Loss = 1.0392e-01, PNorm = 83.5763, GNorm = 0.8031, lr_0 = 2.0803e-04
Loss = 8.4992e-02, PNorm = 83.5809, GNorm = 0.6700, lr_0 = 2.0789e-04
Loss = 8.1804e-02, PNorm = 83.5860, GNorm = 0.7284, lr_0 = 2.0774e-04
Loss = 9.5422e-02, PNorm = 83.5877, GNorm = 1.0545, lr_0 = 2.0760e-04
Loss = 8.2507e-02, PNorm = 83.5906, GNorm = 0.6265, lr_0 = 2.0746e-04
Loss = 8.8838e-02, PNorm = 83.5933, GNorm = 0.6593, lr_0 = 2.0732e-04
Loss = 8.6310e-02, PNorm = 83.5965, GNorm = 0.5238, lr_0 = 2.0718e-04
Loss = 9.1090e-02, PNorm = 83.5999, GNorm = 0.5081, lr_0 = 2.0703e-04
Loss = 7.3053e-02, PNorm = 83.6044, GNorm = 0.3730, lr_0 = 2.0689e-04
Loss = 9.0148e-02, PNorm = 83.6083, GNorm = 0.5621, lr_0 = 2.0675e-04
Loss = 8.7464e-02, PNorm = 83.6122, GNorm = 0.5802, lr_0 = 2.0661e-04
Loss = 8.6596e-02, PNorm = 83.6132, GNorm = 0.6589, lr_0 = 2.0647e-04
Loss = 8.4810e-02, PNorm = 83.6179, GNorm = 0.5699, lr_0 = 2.0633e-04
Loss = 8.5722e-02, PNorm = 83.6248, GNorm = 0.7528, lr_0 = 2.0618e-04
Loss = 8.6265e-02, PNorm = 83.6276, GNorm = 0.6001, lr_0 = 2.0604e-04
Loss = 9.0159e-02, PNorm = 83.6306, GNorm = 0.6988, lr_0 = 2.0590e-04
Loss = 7.8269e-02, PNorm = 83.6354, GNorm = 0.4873, lr_0 = 2.0576e-04
Loss = 9.5110e-02, PNorm = 83.6386, GNorm = 0.5337, lr_0 = 2.0562e-04
Loss = 8.5122e-02, PNorm = 83.6414, GNorm = 0.6079, lr_0 = 2.0548e-04
Loss = 9.3998e-02, PNorm = 83.6443, GNorm = 0.5774, lr_0 = 2.0534e-04
Loss = 8.9850e-02, PNorm = 83.6478, GNorm = 1.1033, lr_0 = 2.0520e-04
Loss = 9.1093e-02, PNorm = 83.6495, GNorm = 0.5882, lr_0 = 2.0506e-04
Loss = 8.7263e-02, PNorm = 83.6525, GNorm = 0.5211, lr_0 = 2.0492e-04
Loss = 9.5920e-02, PNorm = 83.6571, GNorm = 0.7491, lr_0 = 2.0478e-04
Loss = 8.3684e-02, PNorm = 83.6620, GNorm = 0.5069, lr_0 = 2.0464e-04
Loss = 8.0926e-02, PNorm = 83.6675, GNorm = 0.4627, lr_0 = 2.0450e-04
Loss = 9.2884e-02, PNorm = 83.6742, GNorm = 0.9025, lr_0 = 2.0436e-04
Loss = 9.6031e-02, PNorm = 83.6780, GNorm = 0.8909, lr_0 = 2.0422e-04
Loss = 8.4461e-02, PNorm = 83.6809, GNorm = 0.6377, lr_0 = 2.0408e-04
Loss = 8.8276e-02, PNorm = 83.6817, GNorm = 0.5779, lr_0 = 2.0394e-04
Loss = 8.7835e-02, PNorm = 83.6849, GNorm = 0.5075, lr_0 = 2.0380e-04
Loss = 8.8484e-02, PNorm = 83.6894, GNorm = 0.5542, lr_0 = 2.0366e-04
Loss = 9.6429e-02, PNorm = 83.6952, GNorm = 0.6784, lr_0 = 2.0352e-04
Loss = 9.9263e-02, PNorm = 83.7005, GNorm = 0.7642, lr_0 = 2.0338e-04
Loss = 8.3791e-02, PNorm = 83.7037, GNorm = 0.6287, lr_0 = 2.0324e-04
Loss = 9.0923e-02, PNorm = 83.7061, GNorm = 0.5641, lr_0 = 2.0310e-04
Loss = 9.3622e-02, PNorm = 83.7079, GNorm = 0.6743, lr_0 = 2.0296e-04
Loss = 8.4164e-02, PNorm = 83.7107, GNorm = 0.4807, lr_0 = 2.0282e-04
Loss = 8.9974e-02, PNorm = 83.7126, GNorm = 0.6510, lr_0 = 2.0268e-04
Loss = 9.1901e-02, PNorm = 83.7169, GNorm = 0.6812, lr_0 = 2.0254e-04
Loss = 8.8092e-02, PNorm = 83.7232, GNorm = 0.5917, lr_0 = 2.0240e-04
Loss = 8.5787e-02, PNorm = 83.7257, GNorm = 0.6320, lr_0 = 2.0227e-04
Loss = 9.5296e-02, PNorm = 83.7258, GNorm = 0.8079, lr_0 = 2.0213e-04
Loss = 8.3425e-02, PNorm = 83.7278, GNorm = 0.5620, lr_0 = 2.0199e-04
Loss = 7.9501e-02, PNorm = 83.7298, GNorm = 0.6066, lr_0 = 2.0185e-04
Loss = 8.0684e-02, PNorm = 83.7341, GNorm = 0.6922, lr_0 = 2.0171e-04
Loss = 9.8500e-02, PNorm = 83.7382, GNorm = 0.5963, lr_0 = 2.0157e-04
Loss = 9.6078e-02, PNorm = 83.7423, GNorm = 0.5047, lr_0 = 2.0144e-04
Loss = 9.2660e-02, PNorm = 83.7471, GNorm = 0.7794, lr_0 = 2.0130e-04
Loss = 8.0379e-02, PNorm = 83.7506, GNorm = 0.4929, lr_0 = 2.0116e-04
Loss = 9.9492e-02, PNorm = 83.7543, GNorm = 0.6763, lr_0 = 2.0102e-04
Loss = 8.5688e-02, PNorm = 83.7566, GNorm = 0.5287, lr_0 = 2.0088e-04
Loss = 9.0711e-02, PNorm = 83.7584, GNorm = 0.5129, lr_0 = 2.0075e-04
Loss = 8.6638e-02, PNorm = 83.7622, GNorm = 0.9214, lr_0 = 2.0061e-04
Loss = 9.5612e-02, PNorm = 83.7669, GNorm = 0.6139, lr_0 = 2.0047e-04
Loss = 9.8741e-02, PNorm = 83.7714, GNorm = 0.6430, lr_0 = 2.0033e-04
Loss = 8.5747e-02, PNorm = 83.7765, GNorm = 0.6054, lr_0 = 2.0020e-04
Loss = 9.8024e-02, PNorm = 83.7812, GNorm = 0.7393, lr_0 = 2.0006e-04
Loss = 8.1511e-02, PNorm = 83.7845, GNorm = 0.4590, lr_0 = 1.9992e-04
Loss = 9.3068e-02, PNorm = 83.7900, GNorm = 0.5826, lr_0 = 1.9979e-04
Loss = 7.6512e-02, PNorm = 83.7928, GNorm = 0.5135, lr_0 = 1.9965e-04
Loss = 8.5089e-02, PNorm = 83.7930, GNorm = 0.5516, lr_0 = 1.9951e-04
Loss = 9.6955e-02, PNorm = 83.7938, GNorm = 0.5371, lr_0 = 1.9938e-04
Loss = 8.7286e-02, PNorm = 83.7974, GNorm = 0.5309, lr_0 = 1.9924e-04
Loss = 8.7282e-02, PNorm = 83.8035, GNorm = 0.5686, lr_0 = 1.9910e-04
Loss = 8.9686e-02, PNorm = 83.8063, GNorm = 0.9470, lr_0 = 1.9897e-04
Loss = 7.7364e-02, PNorm = 83.8108, GNorm = 0.8967, lr_0 = 1.9883e-04
Loss = 8.4335e-02, PNorm = 83.8110, GNorm = 0.6547, lr_0 = 1.9869e-04
Loss = 9.4400e-02, PNorm = 83.8154, GNorm = 0.5348, lr_0 = 1.9856e-04
Loss = 9.2296e-02, PNorm = 83.8207, GNorm = 0.7078, lr_0 = 1.9842e-04
Loss = 8.3193e-02, PNorm = 83.8256, GNorm = 0.4385, lr_0 = 1.9829e-04
Loss = 8.9165e-02, PNorm = 83.8300, GNorm = 0.4607, lr_0 = 1.9815e-04
Loss = 8.4694e-02, PNorm = 83.8333, GNorm = 0.5092, lr_0 = 1.9801e-04
Loss = 8.7990e-02, PNorm = 83.8367, GNorm = 0.5534, lr_0 = 1.9788e-04
Loss = 9.6421e-02, PNorm = 83.8390, GNorm = 0.7687, lr_0 = 1.9774e-04
Loss = 8.0321e-02, PNorm = 83.8437, GNorm = 0.5544, lr_0 = 1.9761e-04
Loss = 9.1190e-02, PNorm = 83.8479, GNorm = 0.5405, lr_0 = 1.9747e-04
Loss = 9.5906e-02, PNorm = 83.8530, GNorm = 0.5662, lr_0 = 1.9734e-04
Loss = 8.3245e-02, PNorm = 83.8562, GNorm = 0.7348, lr_0 = 1.9720e-04
Loss = 9.0902e-02, PNorm = 83.8584, GNorm = 0.9016, lr_0 = 1.9707e-04
Loss = 9.6854e-02, PNorm = 83.8620, GNorm = 0.5552, lr_0 = 1.9693e-04
Loss = 1.0312e-01, PNorm = 83.8664, GNorm = 0.7493, lr_0 = 1.9680e-04
Loss = 8.4129e-02, PNorm = 83.8692, GNorm = 0.6428, lr_0 = 1.9666e-04
Loss = 9.5441e-02, PNorm = 83.8724, GNorm = 0.5980, lr_0 = 1.9653e-04
Loss = 8.9445e-02, PNorm = 83.8764, GNorm = 0.8805, lr_0 = 1.9639e-04
Loss = 9.1898e-02, PNorm = 83.8807, GNorm = 0.7282, lr_0 = 1.9626e-04
Loss = 9.2859e-02, PNorm = 83.8827, GNorm = 0.6427, lr_0 = 1.9612e-04
Loss = 9.4752e-02, PNorm = 83.8868, GNorm = 0.7539, lr_0 = 1.9599e-04
Loss = 8.9929e-02, PNorm = 83.8906, GNorm = 0.7139, lr_0 = 1.9585e-04
Loss = 9.7010e-02, PNorm = 83.8962, GNorm = 0.6341, lr_0 = 1.9572e-04
Loss = 8.8200e-02, PNorm = 83.8995, GNorm = 0.5231, lr_0 = 1.9559e-04
Loss = 8.3429e-02, PNorm = 83.9013, GNorm = 0.7691, lr_0 = 1.9545e-04
Loss = 9.2337e-02, PNorm = 83.9033, GNorm = 0.6827, lr_0 = 1.9532e-04
Loss = 9.4874e-02, PNorm = 83.9079, GNorm = 0.5553, lr_0 = 1.9518e-04
Loss = 7.7283e-02, PNorm = 83.9128, GNorm = 0.6003, lr_0 = 1.9505e-04
Loss = 8.8386e-02, PNorm = 83.9188, GNorm = 0.6936, lr_0 = 1.9492e-04
Loss = 9.7449e-02, PNorm = 83.9212, GNorm = 0.6950, lr_0 = 1.9478e-04
Loss = 8.5909e-02, PNorm = 83.9237, GNorm = 0.5711, lr_0 = 1.9465e-04
Loss = 8.5575e-02, PNorm = 83.9237, GNorm = 0.6037, lr_0 = 1.9452e-04
Loss = 1.0543e-01, PNorm = 83.9243, GNorm = 0.5312, lr_0 = 1.9438e-04
Loss = 7.9966e-02, PNorm = 83.9275, GNorm = 0.6560, lr_0 = 1.9425e-04
Loss = 1.0571e-01, PNorm = 83.9290, GNorm = 0.7004, lr_0 = 1.9412e-04
Loss = 9.9125e-02, PNorm = 83.9332, GNorm = 0.7328, lr_0 = 1.9398e-04
Loss = 8.4633e-02, PNorm = 83.9346, GNorm = 0.6178, lr_0 = 1.9385e-04
Loss = 1.0255e-01, PNorm = 83.9367, GNorm = 0.6773, lr_0 = 1.9372e-04
Loss = 8.3473e-02, PNorm = 83.9410, GNorm = 0.6338, lr_0 = 1.9359e-04
Loss = 9.6519e-02, PNorm = 83.9453, GNorm = 0.6678, lr_0 = 1.9345e-04
Loss = 9.4971e-02, PNorm = 83.9485, GNorm = 0.7247, lr_0 = 1.9332e-04
Loss = 9.4441e-02, PNorm = 83.9518, GNorm = 0.5387, lr_0 = 1.9319e-04
Loss = 9.4519e-02, PNorm = 83.9560, GNorm = 0.5405, lr_0 = 1.9306e-04
Validation mae = 0.227261
Epoch 22
Loss = 9.5701e-02, PNorm = 83.9626, GNorm = 0.5480, lr_0 = 1.9292e-04
Loss = 8.4788e-02, PNorm = 83.9639, GNorm = 0.5516, lr_0 = 1.9279e-04
Loss = 8.8810e-02, PNorm = 83.9678, GNorm = 0.7091, lr_0 = 1.9266e-04
Loss = 1.1279e-01, PNorm = 83.9725, GNorm = 0.7161, lr_0 = 1.9253e-04
Loss = 7.1235e-02, PNorm = 83.9773, GNorm = 0.5632, lr_0 = 1.9240e-04
Loss = 8.8948e-02, PNorm = 83.9803, GNorm = 0.6438, lr_0 = 1.9226e-04
Loss = 8.4934e-02, PNorm = 83.9843, GNorm = 0.5440, lr_0 = 1.9213e-04
Loss = 8.0717e-02, PNorm = 83.9843, GNorm = 0.5741, lr_0 = 1.9200e-04
Loss = 8.2263e-02, PNorm = 83.9882, GNorm = 0.7512, lr_0 = 1.9187e-04
Loss = 7.6013e-02, PNorm = 83.9913, GNorm = 0.6860, lr_0 = 1.9174e-04
Loss = 8.7794e-02, PNorm = 83.9952, GNorm = 0.7603, lr_0 = 1.9161e-04
Loss = 8.3286e-02, PNorm = 83.9985, GNorm = 0.7492, lr_0 = 1.9148e-04
Loss = 8.9934e-02, PNorm = 84.0000, GNorm = 0.5976, lr_0 = 1.9134e-04
Loss = 7.4151e-02, PNorm = 84.0029, GNorm = 0.5443, lr_0 = 1.9121e-04
Loss = 8.6452e-02, PNorm = 84.0046, GNorm = 0.6216, lr_0 = 1.9108e-04
Loss = 7.9382e-02, PNorm = 84.0074, GNorm = 0.5016, lr_0 = 1.9095e-04
Loss = 8.3281e-02, PNorm = 84.0081, GNorm = 0.5114, lr_0 = 1.9082e-04
Loss = 8.6471e-02, PNorm = 84.0107, GNorm = 0.8821, lr_0 = 1.9069e-04
Loss = 8.9412e-02, PNorm = 84.0154, GNorm = 0.6971, lr_0 = 1.9056e-04
Loss = 8.0341e-02, PNorm = 84.0197, GNorm = 0.8626, lr_0 = 1.9043e-04
Loss = 7.6702e-02, PNorm = 84.0229, GNorm = 0.6478, lr_0 = 1.9030e-04
Loss = 8.3659e-02, PNorm = 84.0276, GNorm = 0.8453, lr_0 = 1.9017e-04
Loss = 9.1172e-02, PNorm = 84.0295, GNorm = 0.6960, lr_0 = 1.9004e-04
Loss = 8.9913e-02, PNorm = 84.0333, GNorm = 0.9557, lr_0 = 1.8991e-04
Loss = 9.0660e-02, PNorm = 84.0399, GNorm = 0.6232, lr_0 = 1.8978e-04
Loss = 7.8625e-02, PNorm = 84.0453, GNorm = 0.5507, lr_0 = 1.8965e-04
Loss = 7.9291e-02, PNorm = 84.0498, GNorm = 0.6264, lr_0 = 1.8952e-04
Loss = 8.2383e-02, PNorm = 84.0533, GNorm = 0.8431, lr_0 = 1.8939e-04
Loss = 8.5999e-02, PNorm = 84.0567, GNorm = 0.6489, lr_0 = 1.8926e-04
Loss = 7.9419e-02, PNorm = 84.0598, GNorm = 0.5771, lr_0 = 1.8913e-04
Loss = 8.6508e-02, PNorm = 84.0631, GNorm = 0.6100, lr_0 = 1.8900e-04
Loss = 8.6661e-02, PNorm = 84.0644, GNorm = 0.6023, lr_0 = 1.8887e-04
Loss = 8.5435e-02, PNorm = 84.0691, GNorm = 0.9305, lr_0 = 1.8874e-04
Loss = 7.7539e-02, PNorm = 84.0732, GNorm = 0.6070, lr_0 = 1.8861e-04
Loss = 7.7329e-02, PNorm = 84.0747, GNorm = 0.6048, lr_0 = 1.8848e-04
Loss = 8.5050e-02, PNorm = 84.0769, GNorm = 0.6026, lr_0 = 1.8835e-04
Loss = 8.4499e-02, PNorm = 84.0841, GNorm = 0.5597, lr_0 = 1.8822e-04
Loss = 8.4278e-02, PNorm = 84.0895, GNorm = 0.6246, lr_0 = 1.8809e-04
Loss = 8.6177e-02, PNorm = 84.0920, GNorm = 0.4939, lr_0 = 1.8797e-04
Loss = 8.5062e-02, PNorm = 84.0960, GNorm = 0.5571, lr_0 = 1.8784e-04
Loss = 7.5561e-02, PNorm = 84.1007, GNorm = 0.5245, lr_0 = 1.8771e-04
Loss = 8.7581e-02, PNorm = 84.1035, GNorm = 0.8680, lr_0 = 1.8758e-04
Loss = 8.3569e-02, PNorm = 84.1061, GNorm = 0.8127, lr_0 = 1.8745e-04
Loss = 9.2242e-02, PNorm = 84.1088, GNorm = 0.8949, lr_0 = 1.8732e-04
Loss = 8.3236e-02, PNorm = 84.1118, GNorm = 0.7216, lr_0 = 1.8719e-04
Loss = 8.2061e-02, PNorm = 84.1154, GNorm = 0.7841, lr_0 = 1.8707e-04
Loss = 8.6986e-02, PNorm = 84.1171, GNorm = 0.8624, lr_0 = 1.8694e-04
Loss = 9.5895e-02, PNorm = 84.1200, GNorm = 0.7266, lr_0 = 1.8681e-04
Loss = 8.0244e-02, PNorm = 84.1217, GNorm = 0.6562, lr_0 = 1.8668e-04
Loss = 9.2958e-02, PNorm = 84.1247, GNorm = 0.7377, lr_0 = 1.8655e-04
Loss = 9.3087e-02, PNorm = 84.1265, GNorm = 0.5804, lr_0 = 1.8643e-04
Loss = 9.6423e-02, PNorm = 84.1308, GNorm = 0.7177, lr_0 = 1.8630e-04
Loss = 9.7696e-02, PNorm = 84.1350, GNorm = 0.8007, lr_0 = 1.8617e-04
Loss = 9.2322e-02, PNorm = 84.1385, GNorm = 0.6313, lr_0 = 1.8604e-04
Loss = 8.6608e-02, PNorm = 84.1425, GNorm = 0.6496, lr_0 = 1.8592e-04
Loss = 8.1358e-02, PNorm = 84.1469, GNorm = 0.8408, lr_0 = 1.8579e-04
Loss = 8.9378e-02, PNorm = 84.1503, GNorm = 1.1590, lr_0 = 1.8566e-04
Loss = 8.0835e-02, PNorm = 84.1523, GNorm = 0.5142, lr_0 = 1.8553e-04
Loss = 8.3416e-02, PNorm = 84.1561, GNorm = 0.5886, lr_0 = 1.8541e-04
Loss = 9.2960e-02, PNorm = 84.1610, GNorm = 0.4926, lr_0 = 1.8528e-04
Loss = 8.1238e-02, PNorm = 84.1643, GNorm = 0.5841, lr_0 = 1.8515e-04
Loss = 9.1516e-02, PNorm = 84.1666, GNorm = 0.4432, lr_0 = 1.8503e-04
Loss = 7.8014e-02, PNorm = 84.1702, GNorm = 0.7190, lr_0 = 1.8490e-04
Loss = 7.4303e-02, PNorm = 84.1737, GNorm = 0.5277, lr_0 = 1.8477e-04
Loss = 8.2629e-02, PNorm = 84.1762, GNorm = 0.6564, lr_0 = 1.8465e-04
Loss = 8.6646e-02, PNorm = 84.1786, GNorm = 0.6050, lr_0 = 1.8452e-04
Loss = 9.3484e-02, PNorm = 84.1803, GNorm = 0.5905, lr_0 = 1.8439e-04
Loss = 8.1773e-02, PNorm = 84.1839, GNorm = 0.7795, lr_0 = 1.8427e-04
Loss = 8.2198e-02, PNorm = 84.1882, GNorm = 0.7259, lr_0 = 1.8414e-04
Loss = 9.0669e-02, PNorm = 84.1883, GNorm = 0.6456, lr_0 = 1.8401e-04
Loss = 8.7753e-02, PNorm = 84.1912, GNorm = 0.7662, lr_0 = 1.8389e-04
Loss = 9.6237e-02, PNorm = 84.1955, GNorm = 0.7470, lr_0 = 1.8376e-04
Loss = 8.7970e-02, PNorm = 84.1998, GNorm = 0.5193, lr_0 = 1.8364e-04
Loss = 8.5784e-02, PNorm = 84.2038, GNorm = 0.5936, lr_0 = 1.8351e-04
Loss = 8.8334e-02, PNorm = 84.2077, GNorm = 0.6338, lr_0 = 1.8338e-04
Loss = 8.9090e-02, PNorm = 84.2093, GNorm = 0.7562, lr_0 = 1.8326e-04
Loss = 8.3625e-02, PNorm = 84.2102, GNorm = 0.8355, lr_0 = 1.8313e-04
Loss = 9.2832e-02, PNorm = 84.2117, GNorm = 0.5742, lr_0 = 1.8301e-04
Loss = 9.6251e-02, PNorm = 84.2141, GNorm = 0.6229, lr_0 = 1.8288e-04
Loss = 7.8212e-02, PNorm = 84.2166, GNorm = 0.6413, lr_0 = 1.8276e-04
Loss = 9.8982e-02, PNorm = 84.2201, GNorm = 0.5907, lr_0 = 1.8263e-04
Loss = 7.2403e-02, PNorm = 84.2250, GNorm = 0.5631, lr_0 = 1.8251e-04
Loss = 8.2472e-02, PNorm = 84.2293, GNorm = 0.5397, lr_0 = 1.8238e-04
Loss = 8.6373e-02, PNorm = 84.2326, GNorm = 0.7446, lr_0 = 1.8226e-04
Loss = 8.9144e-02, PNorm = 84.2355, GNorm = 0.6371, lr_0 = 1.8213e-04
Loss = 7.8899e-02, PNorm = 84.2410, GNorm = 0.6780, lr_0 = 1.8201e-04
Loss = 9.5028e-02, PNorm = 84.2453, GNorm = 0.7191, lr_0 = 1.8188e-04
Loss = 8.0265e-02, PNorm = 84.2460, GNorm = 0.5466, lr_0 = 1.8176e-04
Loss = 7.5660e-02, PNorm = 84.2497, GNorm = 0.5199, lr_0 = 1.8163e-04
Loss = 8.1434e-02, PNorm = 84.2537, GNorm = 0.8752, lr_0 = 1.8151e-04
Loss = 8.6856e-02, PNorm = 84.2561, GNorm = 0.6167, lr_0 = 1.8138e-04
Loss = 8.4292e-02, PNorm = 84.2590, GNorm = 0.7490, lr_0 = 1.8126e-04
Loss = 8.4050e-02, PNorm = 84.2606, GNorm = 0.6658, lr_0 = 1.8114e-04
Loss = 8.4463e-02, PNorm = 84.2636, GNorm = 0.5667, lr_0 = 1.8101e-04
Loss = 8.1331e-02, PNorm = 84.2660, GNorm = 0.5407, lr_0 = 1.8089e-04
Loss = 9.6613e-02, PNorm = 84.2681, GNorm = 0.8035, lr_0 = 1.8076e-04
Loss = 9.2708e-02, PNorm = 84.2690, GNorm = 0.5721, lr_0 = 1.8064e-04
Loss = 1.1043e-01, PNorm = 84.2705, GNorm = 0.7427, lr_0 = 1.8052e-04
Loss = 8.5949e-02, PNorm = 84.2717, GNorm = 0.5932, lr_0 = 1.8039e-04
Loss = 8.8157e-02, PNorm = 84.2732, GNorm = 0.6721, lr_0 = 1.8027e-04
Loss = 8.5368e-02, PNorm = 84.2749, GNorm = 0.6157, lr_0 = 1.8015e-04
Loss = 9.8710e-02, PNorm = 84.2777, GNorm = 0.6083, lr_0 = 1.8002e-04
Loss = 9.0976e-02, PNorm = 84.2789, GNorm = 0.7950, lr_0 = 1.7990e-04
Loss = 8.5824e-02, PNorm = 84.2808, GNorm = 0.6144, lr_0 = 1.7978e-04
Loss = 8.4624e-02, PNorm = 84.2844, GNorm = 0.6866, lr_0 = 1.7965e-04
Loss = 8.2733e-02, PNorm = 84.2874, GNorm = 0.5128, lr_0 = 1.7953e-04
Loss = 7.3475e-02, PNorm = 84.2891, GNorm = 0.4182, lr_0 = 1.7941e-04
Loss = 8.6789e-02, PNorm = 84.2924, GNorm = 0.5886, lr_0 = 1.7928e-04
Loss = 9.4555e-02, PNorm = 84.2956, GNorm = 0.5451, lr_0 = 1.7916e-04
Loss = 7.7412e-02, PNorm = 84.2990, GNorm = 0.4673, lr_0 = 1.7904e-04
Loss = 9.8314e-02, PNorm = 84.3011, GNorm = 0.7060, lr_0 = 1.7892e-04
Loss = 8.3544e-02, PNorm = 84.3049, GNorm = 0.6340, lr_0 = 1.7879e-04
Loss = 9.0451e-02, PNorm = 84.3113, GNorm = 0.7894, lr_0 = 1.7867e-04
Loss = 8.3412e-02, PNorm = 84.3153, GNorm = 0.6141, lr_0 = 1.7855e-04
Loss = 8.5904e-02, PNorm = 84.3155, GNorm = 0.5777, lr_0 = 1.7843e-04
Loss = 9.8003e-02, PNorm = 84.3188, GNorm = 0.6870, lr_0 = 1.7830e-04
Loss = 8.3556e-02, PNorm = 84.3225, GNorm = 0.5701, lr_0 = 1.7818e-04
Loss = 8.7982e-02, PNorm = 84.3264, GNorm = 0.4927, lr_0 = 1.7806e-04
Loss = 9.5103e-02, PNorm = 84.3311, GNorm = 0.6503, lr_0 = 1.7794e-04
Loss = 8.1506e-02, PNorm = 84.3331, GNorm = 0.5505, lr_0 = 1.7782e-04
Validation mae = 0.227219
Epoch 23
Loss = 9.7258e-02, PNorm = 84.3379, GNorm = 0.6790, lr_0 = 1.7769e-04
Loss = 8.0301e-02, PNorm = 84.3407, GNorm = 0.5566, lr_0 = 1.7757e-04
Loss = 8.2557e-02, PNorm = 84.3431, GNorm = 0.7634, lr_0 = 1.7745e-04
Loss = 7.0931e-02, PNorm = 84.3466, GNorm = 0.4990, lr_0 = 1.7733e-04
Loss = 8.2384e-02, PNorm = 84.3505, GNorm = 1.0770, lr_0 = 1.7721e-04
Loss = 7.4372e-02, PNorm = 84.3524, GNorm = 0.9208, lr_0 = 1.7709e-04
Loss = 8.5434e-02, PNorm = 84.3563, GNorm = 0.6159, lr_0 = 1.7696e-04
Loss = 8.4634e-02, PNorm = 84.3611, GNorm = 0.5032, lr_0 = 1.7684e-04
Loss = 8.0665e-02, PNorm = 84.3637, GNorm = 0.6210, lr_0 = 1.7672e-04
Loss = 8.2979e-02, PNorm = 84.3679, GNorm = 0.7889, lr_0 = 1.7660e-04
Loss = 7.8887e-02, PNorm = 84.3691, GNorm = 0.7254, lr_0 = 1.7648e-04
Loss = 7.7735e-02, PNorm = 84.3695, GNorm = 0.6794, lr_0 = 1.7636e-04
Loss = 7.9754e-02, PNorm = 84.3717, GNorm = 0.5735, lr_0 = 1.7624e-04
Loss = 7.5517e-02, PNorm = 84.3767, GNorm = 0.7368, lr_0 = 1.7612e-04
Loss = 8.7807e-02, PNorm = 84.3799, GNorm = 0.4887, lr_0 = 1.7600e-04
Loss = 6.5125e-02, PNorm = 84.3825, GNorm = 0.6913, lr_0 = 1.7588e-04
Loss = 8.9722e-02, PNorm = 84.3835, GNorm = 0.5031, lr_0 = 1.7576e-04
Loss = 8.4412e-02, PNorm = 84.3846, GNorm = 0.7337, lr_0 = 1.7564e-04
Loss = 8.2780e-02, PNorm = 84.3880, GNorm = 0.6915, lr_0 = 1.7552e-04
Loss = 9.7244e-02, PNorm = 84.3925, GNorm = 0.7397, lr_0 = 1.7540e-04
Loss = 8.6544e-02, PNorm = 84.3930, GNorm = 0.6568, lr_0 = 1.7528e-04
Loss = 8.0058e-02, PNorm = 84.3959, GNorm = 0.5985, lr_0 = 1.7516e-04
Loss = 8.1551e-02, PNorm = 84.3983, GNorm = 0.5056, lr_0 = 1.7504e-04
Loss = 7.6713e-02, PNorm = 84.4019, GNorm = 0.7144, lr_0 = 1.7492e-04
Loss = 8.3546e-02, PNorm = 84.4059, GNorm = 0.5948, lr_0 = 1.7480e-04
Loss = 8.2895e-02, PNorm = 84.4090, GNorm = 0.6755, lr_0 = 1.7468e-04
Loss = 7.7057e-02, PNorm = 84.4128, GNorm = 0.5639, lr_0 = 1.7456e-04
Loss = 7.1903e-02, PNorm = 84.4175, GNorm = 0.4535, lr_0 = 1.7444e-04
Loss = 7.2128e-02, PNorm = 84.4197, GNorm = 0.9692, lr_0 = 1.7432e-04
Loss = 7.4782e-02, PNorm = 84.4210, GNorm = 0.6127, lr_0 = 1.7420e-04
Loss = 1.0074e-01, PNorm = 84.4237, GNorm = 0.8566, lr_0 = 1.7408e-04
Loss = 7.1952e-02, PNorm = 84.4266, GNorm = 0.7167, lr_0 = 1.7396e-04
Loss = 8.1628e-02, PNorm = 84.4278, GNorm = 0.5381, lr_0 = 1.7384e-04
Loss = 8.8008e-02, PNorm = 84.4316, GNorm = 0.5892, lr_0 = 1.7372e-04
Loss = 8.3233e-02, PNorm = 84.4363, GNorm = 0.8332, lr_0 = 1.7360e-04
Loss = 8.3244e-02, PNorm = 84.4409, GNorm = 0.8518, lr_0 = 1.7348e-04
Loss = 1.0167e-01, PNorm = 84.4407, GNorm = 1.1370, lr_0 = 1.7336e-04
Loss = 8.7399e-02, PNorm = 84.4427, GNorm = 0.7606, lr_0 = 1.7325e-04
Loss = 8.2256e-02, PNorm = 84.4463, GNorm = 0.6082, lr_0 = 1.7313e-04
Loss = 8.6689e-02, PNorm = 84.4482, GNorm = 0.9263, lr_0 = 1.7301e-04
Loss = 8.0994e-02, PNorm = 84.4507, GNorm = 0.6263, lr_0 = 1.7289e-04
Loss = 7.9297e-02, PNorm = 84.4537, GNorm = 0.6761, lr_0 = 1.7277e-04
Loss = 8.8772e-02, PNorm = 84.4534, GNorm = 0.9157, lr_0 = 1.7265e-04
Loss = 7.7819e-02, PNorm = 84.4560, GNorm = 0.7303, lr_0 = 1.7253e-04
Loss = 8.8636e-02, PNorm = 84.4595, GNorm = 0.5734, lr_0 = 1.7242e-04
Loss = 8.0096e-02, PNorm = 84.4625, GNorm = 0.5815, lr_0 = 1.7230e-04
Loss = 8.1803e-02, PNorm = 84.4650, GNorm = 0.5971, lr_0 = 1.7218e-04
Loss = 8.3753e-02, PNorm = 84.4671, GNorm = 0.4982, lr_0 = 1.7206e-04
Loss = 9.2780e-02, PNorm = 84.4719, GNorm = 0.7075, lr_0 = 1.7194e-04
Loss = 9.2310e-02, PNorm = 84.4776, GNorm = 0.6961, lr_0 = 1.7183e-04
Loss = 7.5180e-02, PNorm = 84.4814, GNorm = 0.6390, lr_0 = 1.7171e-04
Loss = 8.2306e-02, PNorm = 84.4868, GNorm = 0.5221, lr_0 = 1.7159e-04
Loss = 8.4432e-02, PNorm = 84.4898, GNorm = 0.6266, lr_0 = 1.7147e-04
Loss = 9.6374e-02, PNorm = 84.4927, GNorm = 0.8037, lr_0 = 1.7136e-04
Loss = 7.9426e-02, PNorm = 84.4950, GNorm = 0.5621, lr_0 = 1.7124e-04
Loss = 9.0438e-02, PNorm = 84.4977, GNorm = 0.7626, lr_0 = 1.7112e-04
Loss = 8.8505e-02, PNorm = 84.5000, GNorm = 0.7121, lr_0 = 1.7100e-04
Loss = 8.9760e-02, PNorm = 84.5005, GNorm = 0.5284, lr_0 = 1.7089e-04
Loss = 8.2938e-02, PNorm = 84.5018, GNorm = 0.7452, lr_0 = 1.7077e-04
Loss = 8.4385e-02, PNorm = 84.5026, GNorm = 0.5519, lr_0 = 1.7065e-04
Loss = 1.0592e-01, PNorm = 84.5048, GNorm = 0.8806, lr_0 = 1.7054e-04
Loss = 8.1347e-02, PNorm = 84.5092, GNorm = 0.7235, lr_0 = 1.7042e-04
Loss = 8.2966e-02, PNorm = 84.5116, GNorm = 0.5278, lr_0 = 1.7030e-04
Loss = 8.4745e-02, PNorm = 84.5154, GNorm = 0.5706, lr_0 = 1.7019e-04
Loss = 9.3903e-02, PNorm = 84.5192, GNorm = 0.4738, lr_0 = 1.7007e-04
Loss = 8.0767e-02, PNorm = 84.5233, GNorm = 0.6122, lr_0 = 1.6995e-04
Loss = 8.3842e-02, PNorm = 84.5283, GNorm = 0.6240, lr_0 = 1.6984e-04
Loss = 7.6228e-02, PNorm = 84.5301, GNorm = 0.4804, lr_0 = 1.6972e-04
Loss = 8.1606e-02, PNorm = 84.5315, GNorm = 0.5814, lr_0 = 1.6960e-04
Loss = 7.4309e-02, PNorm = 84.5329, GNorm = 0.6233, lr_0 = 1.6949e-04
Loss = 7.4300e-02, PNorm = 84.5360, GNorm = 0.5499, lr_0 = 1.6937e-04
Loss = 8.2779e-02, PNorm = 84.5370, GNorm = 0.6605, lr_0 = 1.6926e-04
Loss = 9.1797e-02, PNorm = 84.5383, GNorm = 0.6920, lr_0 = 1.6914e-04
Loss = 8.2714e-02, PNorm = 84.5418, GNorm = 0.6430, lr_0 = 1.6902e-04
Loss = 8.9058e-02, PNorm = 84.5448, GNorm = 0.6241, lr_0 = 1.6891e-04
Loss = 9.1658e-02, PNorm = 84.5474, GNorm = 0.5206, lr_0 = 1.6879e-04
Loss = 8.6426e-02, PNorm = 84.5495, GNorm = 0.7353, lr_0 = 1.6868e-04
Loss = 7.7674e-02, PNorm = 84.5510, GNorm = 0.5646, lr_0 = 1.6856e-04
Loss = 8.6419e-02, PNorm = 84.5523, GNorm = 0.5724, lr_0 = 1.6845e-04
Loss = 8.2375e-02, PNorm = 84.5555, GNorm = 0.5949, lr_0 = 1.6833e-04
Loss = 8.8353e-02, PNorm = 84.5611, GNorm = 0.7300, lr_0 = 1.6821e-04
Loss = 8.6081e-02, PNorm = 84.5639, GNorm = 0.4655, lr_0 = 1.6810e-04
Loss = 7.7588e-02, PNorm = 84.5658, GNorm = 0.6865, lr_0 = 1.6798e-04
Loss = 7.9123e-02, PNorm = 84.5672, GNorm = 0.6902, lr_0 = 1.6787e-04
Loss = 8.1054e-02, PNorm = 84.5703, GNorm = 0.6308, lr_0 = 1.6775e-04
Loss = 9.5993e-02, PNorm = 84.5737, GNorm = 0.6640, lr_0 = 1.6764e-04
Loss = 8.1892e-02, PNorm = 84.5779, GNorm = 0.6459, lr_0 = 1.6752e-04
Loss = 8.8367e-02, PNorm = 84.5808, GNorm = 0.5282, lr_0 = 1.6741e-04
Loss = 7.5608e-02, PNorm = 84.5838, GNorm = 0.5458, lr_0 = 1.6729e-04
Loss = 7.9360e-02, PNorm = 84.5861, GNorm = 0.8009, lr_0 = 1.6718e-04
Loss = 8.7550e-02, PNorm = 84.5862, GNorm = 0.6944, lr_0 = 1.6707e-04
Loss = 1.0551e-01, PNorm = 84.5889, GNorm = 0.6454, lr_0 = 1.6695e-04
Loss = 8.4781e-02, PNorm = 84.5914, GNorm = 0.4660, lr_0 = 1.6684e-04
Loss = 8.9960e-02, PNorm = 84.5923, GNorm = 0.6032, lr_0 = 1.6672e-04
Loss = 8.4446e-02, PNorm = 84.5938, GNorm = 0.4990, lr_0 = 1.6661e-04
Loss = 9.6745e-02, PNorm = 84.5985, GNorm = 0.7819, lr_0 = 1.6649e-04
Loss = 9.8337e-02, PNorm = 84.6012, GNorm = 0.6540, lr_0 = 1.6638e-04
Loss = 9.1343e-02, PNorm = 84.6058, GNorm = 0.8218, lr_0 = 1.6627e-04
Loss = 7.7883e-02, PNorm = 84.6114, GNorm = 0.7609, lr_0 = 1.6615e-04
Loss = 7.9691e-02, PNorm = 84.6123, GNorm = 0.4899, lr_0 = 1.6604e-04
Loss = 8.8283e-02, PNorm = 84.6130, GNorm = 0.7134, lr_0 = 1.6592e-04
Loss = 7.9140e-02, PNorm = 84.6162, GNorm = 0.5628, lr_0 = 1.6581e-04
Loss = 8.5901e-02, PNorm = 84.6173, GNorm = 0.5016, lr_0 = 1.6570e-04
Loss = 8.4827e-02, PNorm = 84.6204, GNorm = 0.7990, lr_0 = 1.6558e-04
Loss = 7.5525e-02, PNorm = 84.6247, GNorm = 0.8036, lr_0 = 1.6547e-04
Loss = 9.3491e-02, PNorm = 84.6281, GNorm = 0.6307, lr_0 = 1.6536e-04
Loss = 7.8820e-02, PNorm = 84.6310, GNorm = 0.7525, lr_0 = 1.6524e-04
Loss = 8.2109e-02, PNorm = 84.6335, GNorm = 0.6224, lr_0 = 1.6513e-04
Loss = 7.6750e-02, PNorm = 84.6356, GNorm = 0.8494, lr_0 = 1.6502e-04
Loss = 8.6270e-02, PNorm = 84.6373, GNorm = 0.5352, lr_0 = 1.6490e-04
Loss = 7.8259e-02, PNorm = 84.6373, GNorm = 0.5275, lr_0 = 1.6479e-04
Loss = 9.0998e-02, PNorm = 84.6417, GNorm = 0.6542, lr_0 = 1.6468e-04
Loss = 9.5439e-02, PNorm = 84.6454, GNorm = 0.8805, lr_0 = 1.6457e-04
Loss = 7.7120e-02, PNorm = 84.6462, GNorm = 0.6698, lr_0 = 1.6445e-04
Loss = 7.9588e-02, PNorm = 84.6489, GNorm = 0.7706, lr_0 = 1.6434e-04
Loss = 8.6464e-02, PNorm = 84.6519, GNorm = 0.7524, lr_0 = 1.6423e-04
Loss = 8.3181e-02, PNorm = 84.6542, GNorm = 0.5480, lr_0 = 1.6412e-04
Loss = 9.2340e-02, PNorm = 84.6566, GNorm = 0.7167, lr_0 = 1.6400e-04
Loss = 8.3222e-02, PNorm = 84.6597, GNorm = 0.6091, lr_0 = 1.6389e-04
Loss = 8.1655e-02, PNorm = 84.6637, GNorm = 0.8993, lr_0 = 1.6378e-04
Validation mae = 0.228720
Epoch 24
Loss = 7.8410e-02, PNorm = 84.6662, GNorm = 0.6969, lr_0 = 1.6367e-04
Loss = 7.8029e-02, PNorm = 84.6682, GNorm = 0.7280, lr_0 = 1.6355e-04
Loss = 8.8073e-02, PNorm = 84.6715, GNorm = 0.5283, lr_0 = 1.6344e-04
Loss = 7.4995e-02, PNorm = 84.6748, GNorm = 0.5320, lr_0 = 1.6333e-04
Loss = 9.2285e-02, PNorm = 84.6771, GNorm = 0.4790, lr_0 = 1.6322e-04
Loss = 8.4484e-02, PNorm = 84.6794, GNorm = 0.5381, lr_0 = 1.6311e-04
Loss = 8.3749e-02, PNorm = 84.6818, GNorm = 0.5783, lr_0 = 1.6299e-04
Loss = 8.3332e-02, PNorm = 84.6820, GNorm = 0.7580, lr_0 = 1.6288e-04
Loss = 7.8485e-02, PNorm = 84.6854, GNorm = 0.6062, lr_0 = 1.6277e-04
Loss = 7.6247e-02, PNorm = 84.6880, GNorm = 0.5351, lr_0 = 1.6266e-04
Loss = 7.7268e-02, PNorm = 84.6896, GNorm = 0.6099, lr_0 = 1.6255e-04
Loss = 8.3823e-02, PNorm = 84.6933, GNorm = 0.7140, lr_0 = 1.6244e-04
Loss = 8.5846e-02, PNorm = 84.6978, GNorm = 0.6687, lr_0 = 1.6233e-04
Loss = 7.7689e-02, PNorm = 84.7005, GNorm = 0.9383, lr_0 = 1.6221e-04
Loss = 6.9885e-02, PNorm = 84.7011, GNorm = 0.6630, lr_0 = 1.6210e-04
Loss = 6.9310e-02, PNorm = 84.7045, GNorm = 0.4710, lr_0 = 1.6199e-04
Loss = 8.2075e-02, PNorm = 84.7062, GNorm = 0.6690, lr_0 = 1.6188e-04
Loss = 8.6722e-02, PNorm = 84.7081, GNorm = 0.6214, lr_0 = 1.6177e-04
Loss = 7.9175e-02, PNorm = 84.7138, GNorm = 0.7275, lr_0 = 1.6166e-04
Loss = 9.1778e-02, PNorm = 84.7160, GNorm = 1.0424, lr_0 = 1.6155e-04
Loss = 7.6355e-02, PNorm = 84.7184, GNorm = 0.7394, lr_0 = 1.6144e-04
Loss = 7.6541e-02, PNorm = 84.7212, GNorm = 0.5974, lr_0 = 1.6133e-04
Loss = 7.7944e-02, PNorm = 84.7208, GNorm = 0.6102, lr_0 = 1.6122e-04
Loss = 8.0045e-02, PNorm = 84.7214, GNorm = 0.6964, lr_0 = 1.6111e-04
Loss = 7.8210e-02, PNorm = 84.7244, GNorm = 0.6250, lr_0 = 1.6100e-04
Loss = 9.2007e-02, PNorm = 84.7261, GNorm = 0.5145, lr_0 = 1.6089e-04
Loss = 8.8061e-02, PNorm = 84.7278, GNorm = 0.6009, lr_0 = 1.6078e-04
Loss = 7.9331e-02, PNorm = 84.7301, GNorm = 0.6602, lr_0 = 1.6067e-04
Loss = 7.0532e-02, PNorm = 84.7327, GNorm = 0.4771, lr_0 = 1.6056e-04
Loss = 6.2439e-02, PNorm = 84.7345, GNorm = 0.5673, lr_0 = 1.6045e-04
Loss = 7.8146e-02, PNorm = 84.7386, GNorm = 0.9186, lr_0 = 1.6034e-04
Loss = 8.6202e-02, PNorm = 84.7420, GNorm = 0.5779, lr_0 = 1.6023e-04
Loss = 7.8944e-02, PNorm = 84.7434, GNorm = 0.6622, lr_0 = 1.6012e-04
Loss = 8.6286e-02, PNorm = 84.7461, GNorm = 0.9106, lr_0 = 1.6001e-04
Loss = 7.8929e-02, PNorm = 84.7476, GNorm = 0.6489, lr_0 = 1.5990e-04
Loss = 7.5010e-02, PNorm = 84.7486, GNorm = 0.5288, lr_0 = 1.5979e-04
Loss = 7.1048e-02, PNorm = 84.7510, GNorm = 0.4032, lr_0 = 1.5968e-04
Loss = 8.9593e-02, PNorm = 84.7551, GNorm = 0.6659, lr_0 = 1.5957e-04
Loss = 8.5778e-02, PNorm = 84.7563, GNorm = 0.6547, lr_0 = 1.5946e-04
Loss = 8.2095e-02, PNorm = 84.7585, GNorm = 0.6646, lr_0 = 1.5935e-04
Loss = 8.4624e-02, PNorm = 84.7615, GNorm = 0.7108, lr_0 = 1.5924e-04
Loss = 7.5923e-02, PNorm = 84.7665, GNorm = 0.5219, lr_0 = 1.5913e-04
Loss = 8.3766e-02, PNorm = 84.7702, GNorm = 0.6012, lr_0 = 1.5902e-04
Loss = 7.4333e-02, PNorm = 84.7713, GNorm = 0.6822, lr_0 = 1.5891e-04
Loss = 7.1807e-02, PNorm = 84.7740, GNorm = 0.6153, lr_0 = 1.5880e-04
Loss = 7.6816e-02, PNorm = 84.7780, GNorm = 0.6184, lr_0 = 1.5870e-04
Loss = 8.4483e-02, PNorm = 84.7787, GNorm = 0.5547, lr_0 = 1.5859e-04
Loss = 8.7339e-02, PNorm = 84.7796, GNorm = 0.9092, lr_0 = 1.5848e-04
Loss = 8.0259e-02, PNorm = 84.7800, GNorm = 0.4681, lr_0 = 1.5837e-04
Loss = 9.9153e-02, PNorm = 84.7845, GNorm = 0.6188, lr_0 = 1.5826e-04
Loss = 7.9341e-02, PNorm = 84.7892, GNorm = 0.7477, lr_0 = 1.5815e-04
Loss = 7.7795e-02, PNorm = 84.7922, GNorm = 0.7193, lr_0 = 1.5804e-04
Loss = 7.4998e-02, PNorm = 84.7935, GNorm = 0.7923, lr_0 = 1.5794e-04
Loss = 7.6329e-02, PNorm = 84.7954, GNorm = 0.6634, lr_0 = 1.5783e-04
Loss = 8.9550e-02, PNorm = 84.7979, GNorm = 0.5015, lr_0 = 1.5772e-04
Loss = 7.5822e-02, PNorm = 84.7999, GNorm = 0.7339, lr_0 = 1.5761e-04
Loss = 8.2617e-02, PNorm = 84.8004, GNorm = 0.5746, lr_0 = 1.5750e-04
Loss = 8.8831e-02, PNorm = 84.8039, GNorm = 0.7488, lr_0 = 1.5740e-04
Loss = 7.6597e-02, PNorm = 84.8077, GNorm = 0.6793, lr_0 = 1.5729e-04
Loss = 7.5200e-02, PNorm = 84.8103, GNorm = 0.7429, lr_0 = 1.5718e-04
Loss = 8.0919e-02, PNorm = 84.8123, GNorm = 0.6588, lr_0 = 1.5707e-04
Loss = 8.9647e-02, PNorm = 84.8141, GNorm = 0.9397, lr_0 = 1.5697e-04
Loss = 7.4091e-02, PNorm = 84.8172, GNorm = 0.4125, lr_0 = 1.5686e-04
Loss = 8.1249e-02, PNorm = 84.8198, GNorm = 0.5891, lr_0 = 1.5675e-04
Loss = 7.4560e-02, PNorm = 84.8211, GNorm = 0.7022, lr_0 = 1.5664e-04
Loss = 7.4558e-02, PNorm = 84.8219, GNorm = 0.5722, lr_0 = 1.5654e-04
Loss = 9.6039e-02, PNorm = 84.8257, GNorm = 0.6827, lr_0 = 1.5643e-04
Loss = 7.6464e-02, PNorm = 84.8313, GNorm = 0.6882, lr_0 = 1.5632e-04
Loss = 7.7079e-02, PNorm = 84.8333, GNorm = 0.5828, lr_0 = 1.5621e-04
Loss = 8.5635e-02, PNorm = 84.8324, GNorm = 0.7786, lr_0 = 1.5611e-04
Loss = 8.6913e-02, PNorm = 84.8334, GNorm = 0.5523, lr_0 = 1.5600e-04
Loss = 8.3169e-02, PNorm = 84.8383, GNorm = 0.7476, lr_0 = 1.5589e-04
Loss = 8.4229e-02, PNorm = 84.8399, GNorm = 0.6741, lr_0 = 1.5579e-04
Loss = 7.5067e-02, PNorm = 84.8402, GNorm = 0.5099, lr_0 = 1.5568e-04
Loss = 7.3298e-02, PNorm = 84.8412, GNorm = 0.7476, lr_0 = 1.5557e-04
Loss = 8.3837e-02, PNorm = 84.8439, GNorm = 0.6247, lr_0 = 1.5547e-04
Loss = 8.3740e-02, PNorm = 84.8464, GNorm = 0.6314, lr_0 = 1.5536e-04
Loss = 7.2902e-02, PNorm = 84.8493, GNorm = 0.5996, lr_0 = 1.5525e-04
Loss = 8.9672e-02, PNorm = 84.8537, GNorm = 0.5997, lr_0 = 1.5515e-04
Loss = 7.8891e-02, PNorm = 84.8571, GNorm = 0.5309, lr_0 = 1.5504e-04
Loss = 8.5783e-02, PNorm = 84.8591, GNorm = 0.7350, lr_0 = 1.5493e-04
Loss = 8.2669e-02, PNorm = 84.8615, GNorm = 0.7833, lr_0 = 1.5483e-04
Loss = 7.7352e-02, PNorm = 84.8636, GNorm = 0.5705, lr_0 = 1.5472e-04
Loss = 9.2759e-02, PNorm = 84.8657, GNorm = 0.6481, lr_0 = 1.5462e-04
Loss = 7.9270e-02, PNorm = 84.8669, GNorm = 0.4627, lr_0 = 1.5451e-04
Loss = 9.1699e-02, PNorm = 84.8696, GNorm = 0.6614, lr_0 = 1.5440e-04
Loss = 8.0039e-02, PNorm = 84.8707, GNorm = 0.5673, lr_0 = 1.5430e-04
Loss = 7.7519e-02, PNorm = 84.8729, GNorm = 0.6846, lr_0 = 1.5419e-04
Loss = 8.2800e-02, PNorm = 84.8767, GNorm = 0.6258, lr_0 = 1.5409e-04
Loss = 8.5443e-02, PNorm = 84.8810, GNorm = 0.5600, lr_0 = 1.5398e-04
Loss = 8.2734e-02, PNorm = 84.8848, GNorm = 0.7608, lr_0 = 1.5388e-04
Loss = 8.1242e-02, PNorm = 84.8880, GNorm = 0.5087, lr_0 = 1.5377e-04
Loss = 7.6523e-02, PNorm = 84.8897, GNorm = 0.4837, lr_0 = 1.5367e-04
Loss = 7.7547e-02, PNorm = 84.8912, GNorm = 0.6551, lr_0 = 1.5356e-04
Loss = 8.3941e-02, PNorm = 84.8956, GNorm = 0.6852, lr_0 = 1.5346e-04
Loss = 9.1843e-02, PNorm = 84.8984, GNorm = 0.6895, lr_0 = 1.5335e-04
Loss = 9.3940e-02, PNorm = 84.9008, GNorm = 0.5710, lr_0 = 1.5325e-04
Loss = 8.6539e-02, PNorm = 84.9048, GNorm = 0.8685, lr_0 = 1.5314e-04
Loss = 8.4254e-02, PNorm = 84.9076, GNorm = 0.6472, lr_0 = 1.5304e-04
Loss = 7.9815e-02, PNorm = 84.9090, GNorm = 0.8209, lr_0 = 1.5293e-04
Loss = 8.1645e-02, PNorm = 84.9107, GNorm = 0.5200, lr_0 = 1.5283e-04
Loss = 8.2227e-02, PNorm = 84.9126, GNorm = 0.5505, lr_0 = 1.5272e-04
Loss = 9.6118e-02, PNorm = 84.9142, GNorm = 0.9812, lr_0 = 1.5262e-04
Loss = 9.2142e-02, PNorm = 84.9163, GNorm = 0.9971, lr_0 = 1.5251e-04
Loss = 8.4262e-02, PNorm = 84.9190, GNorm = 0.7029, lr_0 = 1.5241e-04
Loss = 8.6463e-02, PNorm = 84.9212, GNorm = 0.7386, lr_0 = 1.5230e-04
Loss = 8.5421e-02, PNorm = 84.9220, GNorm = 0.7302, lr_0 = 1.5220e-04
Loss = 9.9187e-02, PNorm = 84.9229, GNorm = 0.6276, lr_0 = 1.5209e-04
Loss = 7.6832e-02, PNorm = 84.9259, GNorm = 0.6396, lr_0 = 1.5199e-04
Loss = 7.2455e-02, PNorm = 84.9285, GNorm = 0.5104, lr_0 = 1.5189e-04
Loss = 9.1337e-02, PNorm = 84.9306, GNorm = 0.6042, lr_0 = 1.5178e-04
Loss = 8.3129e-02, PNorm = 84.9319, GNorm = 0.5446, lr_0 = 1.5168e-04
Loss = 7.9364e-02, PNorm = 84.9341, GNorm = 0.5478, lr_0 = 1.5157e-04
Loss = 8.0452e-02, PNorm = 84.9358, GNorm = 0.6962, lr_0 = 1.5147e-04
Loss = 8.6366e-02, PNorm = 84.9376, GNorm = 0.6449, lr_0 = 1.5137e-04
Loss = 8.4527e-02, PNorm = 84.9416, GNorm = 0.5619, lr_0 = 1.5126e-04
Loss = 8.9113e-02, PNorm = 84.9450, GNorm = 0.6994, lr_0 = 1.5116e-04
Loss = 7.6245e-02, PNorm = 84.9481, GNorm = 0.6087, lr_0 = 1.5106e-04
Loss = 8.6791e-02, PNorm = 84.9516, GNorm = 0.5024, lr_0 = 1.5095e-04
Loss = 8.4314e-02, PNorm = 84.9545, GNorm = 0.6489, lr_0 = 1.5085e-04
Validation mae = 0.226591
Epoch 25
Loss = 8.2843e-02, PNorm = 84.9574, GNorm = 0.5817, lr_0 = 1.5075e-04
Loss = 8.9442e-02, PNorm = 84.9563, GNorm = 0.5419, lr_0 = 1.5064e-04
Loss = 7.2584e-02, PNorm = 84.9569, GNorm = 0.5760, lr_0 = 1.5054e-04
Loss = 8.0663e-02, PNorm = 84.9593, GNorm = 0.8664, lr_0 = 1.5044e-04
Loss = 6.9152e-02, PNorm = 84.9615, GNorm = 0.7072, lr_0 = 1.5033e-04
Loss = 7.4022e-02, PNorm = 84.9625, GNorm = 0.7298, lr_0 = 1.5023e-04
Loss = 7.0521e-02, PNorm = 84.9629, GNorm = 0.5962, lr_0 = 1.5013e-04
Loss = 8.2771e-02, PNorm = 84.9655, GNorm = 0.8197, lr_0 = 1.5002e-04
Loss = 6.9168e-02, PNorm = 84.9666, GNorm = 0.6168, lr_0 = 1.4992e-04
Loss = 9.1281e-02, PNorm = 84.9669, GNorm = 0.5223, lr_0 = 1.4982e-04
Loss = 8.0188e-02, PNorm = 84.9695, GNorm = 0.6355, lr_0 = 1.4972e-04
Loss = 8.3509e-02, PNorm = 84.9729, GNorm = 0.6711, lr_0 = 1.4961e-04
Loss = 7.4549e-02, PNorm = 84.9752, GNorm = 0.6580, lr_0 = 1.4951e-04
Loss = 8.1230e-02, PNorm = 84.9752, GNorm = 0.6633, lr_0 = 1.4941e-04
Loss = 7.2558e-02, PNorm = 84.9749, GNorm = 0.7293, lr_0 = 1.4931e-04
Loss = 8.0964e-02, PNorm = 84.9767, GNorm = 0.6806, lr_0 = 1.4920e-04
Loss = 8.4028e-02, PNorm = 84.9808, GNorm = 0.5402, lr_0 = 1.4910e-04
Loss = 8.9377e-02, PNorm = 84.9834, GNorm = 0.7780, lr_0 = 1.4900e-04
Loss = 7.2616e-02, PNorm = 84.9844, GNorm = 0.5849, lr_0 = 1.4890e-04
Loss = 9.5076e-02, PNorm = 84.9887, GNorm = 0.6483, lr_0 = 1.4880e-04
Loss = 7.8659e-02, PNorm = 84.9928, GNorm = 0.6027, lr_0 = 1.4869e-04
Loss = 8.3678e-02, PNorm = 84.9937, GNorm = 0.6682, lr_0 = 1.4859e-04
Loss = 7.9130e-02, PNorm = 84.9972, GNorm = 0.6725, lr_0 = 1.4849e-04
Loss = 7.5786e-02, PNorm = 85.0013, GNorm = 0.6357, lr_0 = 1.4839e-04
Loss = 8.7839e-02, PNorm = 85.0045, GNorm = 0.9332, lr_0 = 1.4829e-04
Loss = 8.0788e-02, PNorm = 85.0059, GNorm = 0.4634, lr_0 = 1.4818e-04
Loss = 7.7302e-02, PNorm = 85.0080, GNorm = 0.5086, lr_0 = 1.4808e-04
Loss = 7.7983e-02, PNorm = 85.0111, GNorm = 0.6782, lr_0 = 1.4798e-04
Loss = 7.4750e-02, PNorm = 85.0146, GNorm = 0.9667, lr_0 = 1.4788e-04
Loss = 7.2752e-02, PNorm = 85.0163, GNorm = 0.6156, lr_0 = 1.4778e-04
Loss = 7.9275e-02, PNorm = 85.0191, GNorm = 0.5418, lr_0 = 1.4768e-04
Loss = 7.2655e-02, PNorm = 85.0217, GNorm = 0.7069, lr_0 = 1.4758e-04
Loss = 7.8006e-02, PNorm = 85.0247, GNorm = 0.5752, lr_0 = 1.4748e-04
Loss = 8.5669e-02, PNorm = 85.0280, GNorm = 0.4837, lr_0 = 1.4737e-04
Loss = 9.1874e-02, PNorm = 85.0323, GNorm = 0.7620, lr_0 = 1.4727e-04
Loss = 7.0564e-02, PNorm = 85.0368, GNorm = 0.4998, lr_0 = 1.4717e-04
Loss = 7.5276e-02, PNorm = 85.0394, GNorm = 0.4901, lr_0 = 1.4707e-04
Loss = 8.5729e-02, PNorm = 85.0412, GNorm = 0.7615, lr_0 = 1.4697e-04
Loss = 7.5675e-02, PNorm = 85.0442, GNorm = 0.4915, lr_0 = 1.4687e-04
Loss = 8.2210e-02, PNorm = 85.0459, GNorm = 0.6624, lr_0 = 1.4677e-04
Loss = 8.7809e-02, PNorm = 85.0483, GNorm = 0.5662, lr_0 = 1.4667e-04
Loss = 7.2980e-02, PNorm = 85.0494, GNorm = 0.7294, lr_0 = 1.4657e-04
Loss = 7.7280e-02, PNorm = 85.0509, GNorm = 0.6483, lr_0 = 1.4647e-04
Loss = 8.1585e-02, PNorm = 85.0523, GNorm = 0.5931, lr_0 = 1.4637e-04
Loss = 7.2549e-02, PNorm = 85.0546, GNorm = 0.6180, lr_0 = 1.4627e-04
Loss = 8.5395e-02, PNorm = 85.0567, GNorm = 0.5745, lr_0 = 1.4617e-04
Loss = 8.6569e-02, PNorm = 85.0595, GNorm = 0.6441, lr_0 = 1.4607e-04
Loss = 7.7407e-02, PNorm = 85.0640, GNorm = 0.6980, lr_0 = 1.4597e-04
Loss = 7.5744e-02, PNorm = 85.0655, GNorm = 0.8021, lr_0 = 1.4587e-04
Loss = 8.7658e-02, PNorm = 85.0684, GNorm = 0.7106, lr_0 = 1.4577e-04
Loss = 8.5317e-02, PNorm = 85.0697, GNorm = 0.7842, lr_0 = 1.4567e-04
Loss = 8.9342e-02, PNorm = 85.0725, GNorm = 0.6446, lr_0 = 1.4557e-04
Loss = 7.9635e-02, PNorm = 85.0748, GNorm = 0.7103, lr_0 = 1.4547e-04
Loss = 8.1856e-02, PNorm = 85.0760, GNorm = 0.6682, lr_0 = 1.4537e-04
Loss = 7.5657e-02, PNorm = 85.0787, GNorm = 0.6819, lr_0 = 1.4527e-04
Loss = 8.5867e-02, PNorm = 85.0810, GNorm = 0.7094, lr_0 = 1.4517e-04
Loss = 7.7028e-02, PNorm = 85.0851, GNorm = 0.6618, lr_0 = 1.4507e-04
Loss = 7.7837e-02, PNorm = 85.0885, GNorm = 0.5684, lr_0 = 1.4497e-04
Loss = 8.9041e-02, PNorm = 85.0913, GNorm = 0.8815, lr_0 = 1.4487e-04
Loss = 7.9218e-02, PNorm = 85.0916, GNorm = 0.5065, lr_0 = 1.4477e-04
Loss = 7.6708e-02, PNorm = 85.0921, GNorm = 0.6034, lr_0 = 1.4467e-04
Loss = 9.2179e-02, PNorm = 85.0948, GNorm = 0.6478, lr_0 = 1.4457e-04
Loss = 8.3646e-02, PNorm = 85.0964, GNorm = 0.8004, lr_0 = 1.4447e-04
Loss = 7.3675e-02, PNorm = 85.0984, GNorm = 0.6840, lr_0 = 1.4438e-04
Loss = 8.8286e-02, PNorm = 85.1004, GNorm = 0.7537, lr_0 = 1.4428e-04
Loss = 9.1798e-02, PNorm = 85.1035, GNorm = 0.9599, lr_0 = 1.4418e-04
Loss = 9.7936e-02, PNorm = 85.1065, GNorm = 0.6760, lr_0 = 1.4408e-04
Loss = 8.6045e-02, PNorm = 85.1096, GNorm = 0.9223, lr_0 = 1.4398e-04
Loss = 7.8950e-02, PNorm = 85.1111, GNorm = 0.5979, lr_0 = 1.4388e-04
Loss = 7.9927e-02, PNorm = 85.1127, GNorm = 0.7577, lr_0 = 1.4378e-04
Loss = 7.3861e-02, PNorm = 85.1140, GNorm = 0.8799, lr_0 = 1.4368e-04
Loss = 7.7196e-02, PNorm = 85.1147, GNorm = 0.8575, lr_0 = 1.4359e-04
Loss = 8.3053e-02, PNorm = 85.1141, GNorm = 0.5389, lr_0 = 1.4349e-04
Loss = 9.1078e-02, PNorm = 85.1164, GNorm = 0.8021, lr_0 = 1.4339e-04
Loss = 7.9392e-02, PNorm = 85.1197, GNorm = 0.5356, lr_0 = 1.4329e-04
Loss = 7.5643e-02, PNorm = 85.1220, GNorm = 0.5374, lr_0 = 1.4319e-04
Loss = 8.3430e-02, PNorm = 85.1242, GNorm = 0.6607, lr_0 = 1.4310e-04
Loss = 8.7480e-02, PNorm = 85.1250, GNorm = 0.5990, lr_0 = 1.4300e-04
Loss = 8.2869e-02, PNorm = 85.1269, GNorm = 0.5105, lr_0 = 1.4290e-04
Loss = 8.5043e-02, PNorm = 85.1308, GNorm = 0.5647, lr_0 = 1.4280e-04
Loss = 9.2954e-02, PNorm = 85.1332, GNorm = 0.6341, lr_0 = 1.4270e-04
Loss = 7.3763e-02, PNorm = 85.1373, GNorm = 0.6397, lr_0 = 1.4261e-04
Loss = 7.6231e-02, PNorm = 85.1431, GNorm = 0.5907, lr_0 = 1.4251e-04
Loss = 7.6572e-02, PNorm = 85.1467, GNorm = 0.6520, lr_0 = 1.4241e-04
Loss = 8.4707e-02, PNorm = 85.1484, GNorm = 0.5774, lr_0 = 1.4231e-04
Loss = 8.1863e-02, PNorm = 85.1486, GNorm = 0.7370, lr_0 = 1.4222e-04
Loss = 8.4659e-02, PNorm = 85.1495, GNorm = 0.6802, lr_0 = 1.4212e-04
Loss = 8.8297e-02, PNorm = 85.1523, GNorm = 0.7078, lr_0 = 1.4202e-04
Loss = 8.8722e-02, PNorm = 85.1546, GNorm = 0.6328, lr_0 = 1.4192e-04
Loss = 8.8126e-02, PNorm = 85.1556, GNorm = 0.5611, lr_0 = 1.4183e-04
Loss = 8.9432e-02, PNorm = 85.1567, GNorm = 0.6839, lr_0 = 1.4173e-04
Loss = 6.7775e-02, PNorm = 85.1580, GNorm = 0.5786, lr_0 = 1.4163e-04
Loss = 7.5929e-02, PNorm = 85.1601, GNorm = 0.6137, lr_0 = 1.4153e-04
Loss = 8.1392e-02, PNorm = 85.1631, GNorm = 0.6025, lr_0 = 1.4144e-04
Loss = 7.6060e-02, PNorm = 85.1643, GNorm = 0.6184, lr_0 = 1.4134e-04
Loss = 8.0643e-02, PNorm = 85.1672, GNorm = 0.5290, lr_0 = 1.4124e-04
Loss = 7.7988e-02, PNorm = 85.1689, GNorm = 0.5192, lr_0 = 1.4115e-04
Loss = 7.7522e-02, PNorm = 85.1702, GNorm = 0.8649, lr_0 = 1.4105e-04
Loss = 8.0448e-02, PNorm = 85.1717, GNorm = 0.7151, lr_0 = 1.4095e-04
Loss = 8.2805e-02, PNorm = 85.1742, GNorm = 0.7334, lr_0 = 1.4086e-04
Loss = 7.2605e-02, PNorm = 85.1765, GNorm = 0.5675, lr_0 = 1.4076e-04
Loss = 7.7669e-02, PNorm = 85.1777, GNorm = 0.6125, lr_0 = 1.4066e-04
Loss = 7.4630e-02, PNorm = 85.1782, GNorm = 0.6961, lr_0 = 1.4057e-04
Loss = 8.5566e-02, PNorm = 85.1793, GNorm = 0.5583, lr_0 = 1.4047e-04
Loss = 7.9523e-02, PNorm = 85.1823, GNorm = 0.6753, lr_0 = 1.4038e-04
Loss = 8.8156e-02, PNorm = 85.1856, GNorm = 0.6901, lr_0 = 1.4028e-04
Loss = 7.9380e-02, PNorm = 85.1876, GNorm = 0.5321, lr_0 = 1.4018e-04
Loss = 7.7337e-02, PNorm = 85.1899, GNorm = 0.8745, lr_0 = 1.4009e-04
Loss = 8.1503e-02, PNorm = 85.1928, GNorm = 0.6536, lr_0 = 1.3999e-04
Loss = 8.0528e-02, PNorm = 85.1952, GNorm = 0.7329, lr_0 = 1.3990e-04
Loss = 7.4967e-02, PNorm = 85.1977, GNorm = 0.6163, lr_0 = 1.3980e-04
Loss = 7.4156e-02, PNorm = 85.1982, GNorm = 0.6935, lr_0 = 1.3970e-04
Loss = 7.3124e-02, PNorm = 85.1987, GNorm = 0.5842, lr_0 = 1.3961e-04
Loss = 8.8683e-02, PNorm = 85.2014, GNorm = 0.6989, lr_0 = 1.3951e-04
Loss = 6.2187e-02, PNorm = 85.2037, GNorm = 0.6372, lr_0 = 1.3942e-04
Loss = 7.6395e-02, PNorm = 85.2058, GNorm = 0.6723, lr_0 = 1.3932e-04
Loss = 8.3683e-02, PNorm = 85.2074, GNorm = 0.6114, lr_0 = 1.3923e-04
Loss = 8.2354e-02, PNorm = 85.2089, GNorm = 0.7216, lr_0 = 1.3913e-04
Loss = 8.6124e-02, PNorm = 85.2107, GNorm = 0.6859, lr_0 = 1.3904e-04
Loss = 6.5843e-02, PNorm = 85.2141, GNorm = 0.5525, lr_0 = 1.3894e-04
Validation mae = 0.227304
Epoch 26
Loss = 7.3860e-02, PNorm = 85.2157, GNorm = 0.6355, lr_0 = 1.3884e-04
Loss = 7.4965e-02, PNorm = 85.2176, GNorm = 0.6005, lr_0 = 1.3875e-04
Loss = 7.3175e-02, PNorm = 85.2206, GNorm = 0.5296, lr_0 = 1.3865e-04
Loss = 7.2326e-02, PNorm = 85.2219, GNorm = 0.5729, lr_0 = 1.3856e-04
Loss = 8.3333e-02, PNorm = 85.2235, GNorm = 0.4923, lr_0 = 1.3846e-04
Loss = 7.3396e-02, PNorm = 85.2251, GNorm = 0.5575, lr_0 = 1.3837e-04
Loss = 6.6430e-02, PNorm = 85.2260, GNorm = 0.5958, lr_0 = 1.3828e-04
Loss = 8.6828e-02, PNorm = 85.2277, GNorm = 0.9716, lr_0 = 1.3818e-04
Loss = 6.6980e-02, PNorm = 85.2283, GNorm = 0.6415, lr_0 = 1.3809e-04
Loss = 7.2935e-02, PNorm = 85.2296, GNorm = 0.4832, lr_0 = 1.3799e-04
Loss = 8.4308e-02, PNorm = 85.2313, GNorm = 0.6201, lr_0 = 1.3790e-04
Loss = 8.3095e-02, PNorm = 85.2336, GNorm = 0.6911, lr_0 = 1.3780e-04
Loss = 8.2004e-02, PNorm = 85.2346, GNorm = 0.6357, lr_0 = 1.3771e-04
Loss = 7.8983e-02, PNorm = 85.2364, GNorm = 0.7822, lr_0 = 1.3761e-04
Loss = 8.2166e-02, PNorm = 85.2386, GNorm = 0.6692, lr_0 = 1.3752e-04
Loss = 7.9257e-02, PNorm = 85.2406, GNorm = 0.6791, lr_0 = 1.3742e-04
Loss = 7.9130e-02, PNorm = 85.2413, GNorm = 0.7438, lr_0 = 1.3733e-04
Loss = 7.2305e-02, PNorm = 85.2426, GNorm = 0.4691, lr_0 = 1.3724e-04
Loss = 7.8911e-02, PNorm = 85.2455, GNorm = 0.6442, lr_0 = 1.3714e-04
Loss = 7.7107e-02, PNorm = 85.2480, GNorm = 0.4892, lr_0 = 1.3705e-04
Loss = 6.9994e-02, PNorm = 85.2474, GNorm = 0.5463, lr_0 = 1.3695e-04
Loss = 7.9465e-02, PNorm = 85.2481, GNorm = 0.7419, lr_0 = 1.3686e-04
Loss = 8.0510e-02, PNorm = 85.2503, GNorm = 0.4651, lr_0 = 1.3677e-04
Loss = 6.9204e-02, PNorm = 85.2518, GNorm = 0.6361, lr_0 = 1.3667e-04
Loss = 8.6190e-02, PNorm = 85.2542, GNorm = 0.6690, lr_0 = 1.3658e-04
Loss = 7.3273e-02, PNorm = 85.2566, GNorm = 0.5811, lr_0 = 1.3649e-04
Loss = 7.2057e-02, PNorm = 85.2592, GNorm = 0.5853, lr_0 = 1.3639e-04
Loss = 7.9800e-02, PNorm = 85.2615, GNorm = 0.5871, lr_0 = 1.3630e-04
Loss = 7.0414e-02, PNorm = 85.2652, GNorm = 0.6221, lr_0 = 1.3621e-04
Loss = 7.8129e-02, PNorm = 85.2661, GNorm = 0.5271, lr_0 = 1.3611e-04
Loss = 7.7260e-02, PNorm = 85.2677, GNorm = 0.7553, lr_0 = 1.3602e-04
Loss = 8.8533e-02, PNorm = 85.2707, GNorm = 0.7082, lr_0 = 1.3593e-04
Loss = 8.5641e-02, PNorm = 85.2734, GNorm = 0.4793, lr_0 = 1.3583e-04
Loss = 7.7410e-02, PNorm = 85.2751, GNorm = 0.5730, lr_0 = 1.3574e-04
Loss = 8.6920e-02, PNorm = 85.2780, GNorm = 0.4948, lr_0 = 1.3565e-04
Loss = 7.2640e-02, PNorm = 85.2799, GNorm = 0.6385, lr_0 = 1.3555e-04
Loss = 8.2175e-02, PNorm = 85.2827, GNorm = 0.5107, lr_0 = 1.3546e-04
Loss = 9.1135e-02, PNorm = 85.2855, GNorm = 0.8116, lr_0 = 1.3537e-04
Loss = 7.2082e-02, PNorm = 85.2871, GNorm = 0.5214, lr_0 = 1.3528e-04
Loss = 7.5805e-02, PNorm = 85.2898, GNorm = 0.5475, lr_0 = 1.3518e-04
Loss = 7.3736e-02, PNorm = 85.2931, GNorm = 0.7027, lr_0 = 1.3509e-04
Loss = 6.9758e-02, PNorm = 85.2950, GNorm = 0.5605, lr_0 = 1.3500e-04
Loss = 7.4853e-02, PNorm = 85.2960, GNorm = 0.7289, lr_0 = 1.3491e-04
Loss = 9.2478e-02, PNorm = 85.2975, GNorm = 0.6493, lr_0 = 1.3481e-04
Loss = 7.8932e-02, PNorm = 85.3000, GNorm = 0.6129, lr_0 = 1.3472e-04
Loss = 8.6724e-02, PNorm = 85.3020, GNorm = 0.5552, lr_0 = 1.3463e-04
Loss = 7.8069e-02, PNorm = 85.3040, GNorm = 0.8897, lr_0 = 1.3454e-04
Loss = 7.3357e-02, PNorm = 85.3035, GNorm = 0.7239, lr_0 = 1.3444e-04
Loss = 7.8601e-02, PNorm = 85.3033, GNorm = 0.5141, lr_0 = 1.3435e-04
Loss = 7.5950e-02, PNorm = 85.3040, GNorm = 0.5000, lr_0 = 1.3426e-04
Loss = 7.7655e-02, PNorm = 85.3057, GNorm = 0.6357, lr_0 = 1.3417e-04
Loss = 7.7949e-02, PNorm = 85.3081, GNorm = 0.8163, lr_0 = 1.3408e-04
Loss = 9.0097e-02, PNorm = 85.3120, GNorm = 1.2331, lr_0 = 1.3398e-04
Loss = 6.8617e-02, PNorm = 85.3134, GNorm = 0.5264, lr_0 = 1.3389e-04
Loss = 8.3104e-02, PNorm = 85.3155, GNorm = 1.1640, lr_0 = 1.3380e-04
Loss = 8.9415e-02, PNorm = 85.3190, GNorm = 0.5616, lr_0 = 1.3371e-04
Loss = 8.4992e-02, PNorm = 85.3220, GNorm = 0.6163, lr_0 = 1.3362e-04
Loss = 7.2814e-02, PNorm = 85.3218, GNorm = 0.7043, lr_0 = 1.3353e-04
Loss = 8.2306e-02, PNorm = 85.3220, GNorm = 0.4277, lr_0 = 1.3343e-04
Loss = 8.3174e-02, PNorm = 85.3240, GNorm = 0.5631, lr_0 = 1.3334e-04
Loss = 7.7995e-02, PNorm = 85.3262, GNorm = 0.5587, lr_0 = 1.3325e-04
Loss = 7.4857e-02, PNorm = 85.3288, GNorm = 0.7480, lr_0 = 1.3316e-04
Loss = 8.4369e-02, PNorm = 85.3326, GNorm = 0.6485, lr_0 = 1.3307e-04
Loss = 8.6781e-02, PNorm = 85.3366, GNorm = 0.5306, lr_0 = 1.3298e-04
Loss = 9.3012e-02, PNorm = 85.3393, GNorm = 0.4412, lr_0 = 1.3289e-04
Loss = 8.2987e-02, PNorm = 85.3404, GNorm = 0.6310, lr_0 = 1.3280e-04
Loss = 7.9463e-02, PNorm = 85.3436, GNorm = 0.4439, lr_0 = 1.3270e-04
Loss = 8.7029e-02, PNorm = 85.3486, GNorm = 0.5656, lr_0 = 1.3261e-04
Loss = 8.2615e-02, PNorm = 85.3507, GNorm = 0.7478, lr_0 = 1.3252e-04
Loss = 7.9602e-02, PNorm = 85.3512, GNorm = 0.6099, lr_0 = 1.3243e-04
Loss = 7.9140e-02, PNorm = 85.3535, GNorm = 0.5233, lr_0 = 1.3234e-04
Loss = 7.3925e-02, PNorm = 85.3559, GNorm = 0.6385, lr_0 = 1.3225e-04
Loss = 7.5877e-02, PNorm = 85.3583, GNorm = 0.6796, lr_0 = 1.3216e-04
Loss = 8.2302e-02, PNorm = 85.3587, GNorm = 1.1031, lr_0 = 1.3207e-04
Loss = 8.6018e-02, PNorm = 85.3599, GNorm = 0.6582, lr_0 = 1.3198e-04
Loss = 7.9361e-02, PNorm = 85.3612, GNorm = 0.6188, lr_0 = 1.3189e-04
Loss = 7.9204e-02, PNorm = 85.3625, GNorm = 0.8018, lr_0 = 1.3180e-04
Loss = 9.1829e-02, PNorm = 85.3643, GNorm = 0.6678, lr_0 = 1.3171e-04
Loss = 7.1436e-02, PNorm = 85.3656, GNorm = 0.6029, lr_0 = 1.3162e-04
Loss = 7.9810e-02, PNorm = 85.3677, GNorm = 0.7220, lr_0 = 1.3153e-04
Loss = 7.5869e-02, PNorm = 85.3698, GNorm = 0.7782, lr_0 = 1.3144e-04
Loss = 7.7399e-02, PNorm = 85.3712, GNorm = 0.5722, lr_0 = 1.3135e-04
Loss = 8.0024e-02, PNorm = 85.3733, GNorm = 0.6260, lr_0 = 1.3126e-04
Loss = 8.5951e-02, PNorm = 85.3763, GNorm = 0.5712, lr_0 = 1.3117e-04
Loss = 8.4239e-02, PNorm = 85.3774, GNorm = 0.5268, lr_0 = 1.3108e-04
Loss = 7.6537e-02, PNorm = 85.3791, GNorm = 0.6532, lr_0 = 1.3099e-04
Loss = 7.0317e-02, PNorm = 85.3815, GNorm = 0.5421, lr_0 = 1.3090e-04
Loss = 8.4764e-02, PNorm = 85.3840, GNorm = 0.7006, lr_0 = 1.3081e-04
Loss = 7.9168e-02, PNorm = 85.3852, GNorm = 0.6220, lr_0 = 1.3072e-04
Loss = 8.5862e-02, PNorm = 85.3846, GNorm = 0.6464, lr_0 = 1.3063e-04
Loss = 7.8522e-02, PNorm = 85.3870, GNorm = 0.7281, lr_0 = 1.3054e-04
Loss = 7.8487e-02, PNorm = 85.3889, GNorm = 0.5486, lr_0 = 1.3045e-04
Loss = 7.4875e-02, PNorm = 85.3915, GNorm = 0.4725, lr_0 = 1.3036e-04
Loss = 7.1666e-02, PNorm = 85.3933, GNorm = 0.5971, lr_0 = 1.3027e-04
Loss = 6.3506e-02, PNorm = 85.3953, GNorm = 0.5309, lr_0 = 1.3018e-04
Loss = 7.7872e-02, PNorm = 85.3965, GNorm = 0.7187, lr_0 = 1.3009e-04
Loss = 7.2546e-02, PNorm = 85.3981, GNorm = 0.6620, lr_0 = 1.3000e-04
Loss = 7.9721e-02, PNorm = 85.3984, GNorm = 0.7334, lr_0 = 1.2992e-04
Loss = 6.8603e-02, PNorm = 85.3997, GNorm = 0.6132, lr_0 = 1.2983e-04
Loss = 8.0898e-02, PNorm = 85.4019, GNorm = 0.8528, lr_0 = 1.2974e-04
Loss = 7.0739e-02, PNorm = 85.4034, GNorm = 0.5688, lr_0 = 1.2965e-04
Loss = 8.4736e-02, PNorm = 85.4046, GNorm = 0.7383, lr_0 = 1.2956e-04
Loss = 8.3167e-02, PNorm = 85.4080, GNorm = 0.7529, lr_0 = 1.2947e-04
Loss = 8.2711e-02, PNorm = 85.4117, GNorm = 0.7670, lr_0 = 1.2938e-04
Loss = 7.9985e-02, PNorm = 85.4119, GNorm = 0.8729, lr_0 = 1.2929e-04
Loss = 7.8743e-02, PNorm = 85.4133, GNorm = 0.6013, lr_0 = 1.2921e-04
Loss = 7.7544e-02, PNorm = 85.4147, GNorm = 0.6300, lr_0 = 1.2912e-04
Loss = 7.9989e-02, PNorm = 85.4155, GNorm = 0.7259, lr_0 = 1.2903e-04
Loss = 7.7708e-02, PNorm = 85.4162, GNorm = 0.4657, lr_0 = 1.2894e-04
Loss = 8.2983e-02, PNorm = 85.4190, GNorm = 0.6087, lr_0 = 1.2885e-04
Loss = 7.5354e-02, PNorm = 85.4231, GNorm = 0.6186, lr_0 = 1.2876e-04
Loss = 8.2591e-02, PNorm = 85.4252, GNorm = 0.5372, lr_0 = 1.2867e-04
Loss = 8.0596e-02, PNorm = 85.4261, GNorm = 0.5739, lr_0 = 1.2859e-04
Loss = 8.6881e-02, PNorm = 85.4267, GNorm = 0.7108, lr_0 = 1.2850e-04
Loss = 8.8448e-02, PNorm = 85.4288, GNorm = 0.4471, lr_0 = 1.2841e-04
Loss = 8.0310e-02, PNorm = 85.4314, GNorm = 0.6681, lr_0 = 1.2832e-04
Loss = 8.8875e-02, PNorm = 85.4339, GNorm = 0.6223, lr_0 = 1.2823e-04
Loss = 8.3411e-02, PNorm = 85.4377, GNorm = 0.5978, lr_0 = 1.2815e-04
Loss = 8.0386e-02, PNorm = 85.4407, GNorm = 0.5702, lr_0 = 1.2806e-04
Loss = 8.8237e-02, PNorm = 85.4414, GNorm = 0.8224, lr_0 = 1.2797e-04
Validation mae = 0.225124
Epoch 27
Loss = 7.4825e-02, PNorm = 85.4427, GNorm = 0.7237, lr_0 = 1.2788e-04
Loss = 7.7173e-02, PNorm = 85.4447, GNorm = 0.6431, lr_0 = 1.2780e-04
Loss = 7.4980e-02, PNorm = 85.4464, GNorm = 0.6431, lr_0 = 1.2771e-04
Loss = 6.5570e-02, PNorm = 85.4489, GNorm = 1.0235, lr_0 = 1.2762e-04
Loss = 6.7478e-02, PNorm = 85.4509, GNorm = 0.5272, lr_0 = 1.2753e-04
Loss = 8.3147e-02, PNorm = 85.4537, GNorm = 0.5588, lr_0 = 1.2745e-04
Loss = 7.1327e-02, PNorm = 85.4561, GNorm = 0.5981, lr_0 = 1.2736e-04
Loss = 7.8732e-02, PNorm = 85.4582, GNorm = 0.6540, lr_0 = 1.2727e-04
Loss = 8.1147e-02, PNorm = 85.4613, GNorm = 0.5392, lr_0 = 1.2718e-04
Loss = 7.1409e-02, PNorm = 85.4628, GNorm = 0.7387, lr_0 = 1.2710e-04
Loss = 8.3065e-02, PNorm = 85.4654, GNorm = 0.6669, lr_0 = 1.2701e-04
Loss = 6.6849e-02, PNorm = 85.4671, GNorm = 0.4406, lr_0 = 1.2692e-04
Loss = 7.1505e-02, PNorm = 85.4682, GNorm = 0.4965, lr_0 = 1.2684e-04
Loss = 6.9878e-02, PNorm = 85.4691, GNorm = 0.5094, lr_0 = 1.2675e-04
Loss = 7.4800e-02, PNorm = 85.4704, GNorm = 0.6618, lr_0 = 1.2666e-04
Loss = 8.4731e-02, PNorm = 85.4711, GNorm = 0.6727, lr_0 = 1.2658e-04
Loss = 8.4886e-02, PNorm = 85.4718, GNorm = 0.6133, lr_0 = 1.2649e-04
Loss = 8.2277e-02, PNorm = 85.4730, GNorm = 0.7607, lr_0 = 1.2640e-04
Loss = 7.9133e-02, PNorm = 85.4742, GNorm = 0.6215, lr_0 = 1.2632e-04
Loss = 7.6405e-02, PNorm = 85.4760, GNorm = 0.8346, lr_0 = 1.2623e-04
Loss = 7.2489e-02, PNorm = 85.4761, GNorm = 0.7266, lr_0 = 1.2614e-04
Loss = 8.3797e-02, PNorm = 85.4773, GNorm = 0.7288, lr_0 = 1.2606e-04
Loss = 7.0140e-02, PNorm = 85.4793, GNorm = 0.4198, lr_0 = 1.2597e-04
Loss = 7.0802e-02, PNorm = 85.4803, GNorm = 0.5208, lr_0 = 1.2588e-04
Loss = 7.5966e-02, PNorm = 85.4826, GNorm = 0.5569, lr_0 = 1.2580e-04
Loss = 8.8947e-02, PNorm = 85.4857, GNorm = 0.8707, lr_0 = 1.2571e-04
Loss = 7.4247e-02, PNorm = 85.4888, GNorm = 0.7112, lr_0 = 1.2563e-04
Loss = 8.6289e-02, PNorm = 85.4913, GNorm = 0.6884, lr_0 = 1.2554e-04
Loss = 8.6818e-02, PNorm = 85.4932, GNorm = 0.7797, lr_0 = 1.2545e-04
Loss = 7.4919e-02, PNorm = 85.4930, GNorm = 0.4974, lr_0 = 1.2537e-04
Loss = 7.9482e-02, PNorm = 85.4962, GNorm = 0.6918, lr_0 = 1.2528e-04
Loss = 6.9456e-02, PNorm = 85.4987, GNorm = 0.5495, lr_0 = 1.2520e-04
Loss = 6.1702e-02, PNorm = 85.4993, GNorm = 0.8179, lr_0 = 1.2511e-04
Loss = 7.9083e-02, PNorm = 85.5006, GNorm = 0.6108, lr_0 = 1.2502e-04
Loss = 7.6218e-02, PNorm = 85.5022, GNorm = 0.7153, lr_0 = 1.2494e-04
Loss = 8.4418e-02, PNorm = 85.5053, GNorm = 0.7135, lr_0 = 1.2485e-04
Loss = 7.8747e-02, PNorm = 85.5070, GNorm = 0.5695, lr_0 = 1.2477e-04
Loss = 7.6988e-02, PNorm = 85.5083, GNorm = 0.6120, lr_0 = 1.2468e-04
Loss = 7.1838e-02, PNorm = 85.5109, GNorm = 0.7714, lr_0 = 1.2460e-04
Loss = 6.8722e-02, PNorm = 85.5131, GNorm = 0.5248, lr_0 = 1.2451e-04
Loss = 7.7752e-02, PNorm = 85.5149, GNorm = 0.6458, lr_0 = 1.2443e-04
Loss = 8.1601e-02, PNorm = 85.5162, GNorm = 0.7492, lr_0 = 1.2434e-04
Loss = 6.8558e-02, PNorm = 85.5174, GNorm = 0.5147, lr_0 = 1.2426e-04
Loss = 8.2910e-02, PNorm = 85.5195, GNorm = 0.6499, lr_0 = 1.2417e-04
Loss = 7.6035e-02, PNorm = 85.5222, GNorm = 0.6238, lr_0 = 1.2409e-04
Loss = 7.2339e-02, PNorm = 85.5255, GNorm = 0.7959, lr_0 = 1.2400e-04
Loss = 7.5691e-02, PNorm = 85.5279, GNorm = 0.5496, lr_0 = 1.2392e-04
Loss = 7.2929e-02, PNorm = 85.5290, GNorm = 0.5942, lr_0 = 1.2383e-04
Loss = 7.5728e-02, PNorm = 85.5296, GNorm = 0.4622, lr_0 = 1.2375e-04
Loss = 8.8550e-02, PNorm = 85.5318, GNorm = 0.6170, lr_0 = 1.2366e-04
Loss = 7.6321e-02, PNorm = 85.5322, GNorm = 0.9385, lr_0 = 1.2358e-04
Loss = 8.5595e-02, PNorm = 85.5334, GNorm = 1.0904, lr_0 = 1.2349e-04
Loss = 7.3472e-02, PNorm = 85.5362, GNorm = 0.7214, lr_0 = 1.2341e-04
Loss = 7.3380e-02, PNorm = 85.5386, GNorm = 0.7546, lr_0 = 1.2332e-04
Loss = 7.6368e-02, PNorm = 85.5412, GNorm = 0.7769, lr_0 = 1.2324e-04
Loss = 8.0533e-02, PNorm = 85.5432, GNorm = 0.6403, lr_0 = 1.2315e-04
Loss = 7.8753e-02, PNorm = 85.5446, GNorm = 0.6125, lr_0 = 1.2307e-04
Loss = 7.9965e-02, PNorm = 85.5484, GNorm = 0.4837, lr_0 = 1.2298e-04
Loss = 7.2852e-02, PNorm = 85.5507, GNorm = 0.5704, lr_0 = 1.2290e-04
Loss = 7.8204e-02, PNorm = 85.5516, GNorm = 0.6521, lr_0 = 1.2282e-04
Loss = 6.2597e-02, PNorm = 85.5525, GNorm = 0.4194, lr_0 = 1.2273e-04
Loss = 7.8067e-02, PNorm = 85.5542, GNorm = 0.5499, lr_0 = 1.2265e-04
Loss = 8.7202e-02, PNorm = 85.5552, GNorm = 1.0182, lr_0 = 1.2256e-04
Loss = 8.3728e-02, PNorm = 85.5579, GNorm = 0.5210, lr_0 = 1.2248e-04
Loss = 6.7771e-02, PNorm = 85.5593, GNorm = 0.4783, lr_0 = 1.2240e-04
Loss = 7.9784e-02, PNorm = 85.5606, GNorm = 0.6741, lr_0 = 1.2231e-04
Loss = 6.8494e-02, PNorm = 85.5616, GNorm = 0.5816, lr_0 = 1.2223e-04
Loss = 7.7477e-02, PNorm = 85.5636, GNorm = 0.7006, lr_0 = 1.2214e-04
Loss = 8.4517e-02, PNorm = 85.5649, GNorm = 0.6618, lr_0 = 1.2206e-04
Loss = 8.2883e-02, PNorm = 85.5679, GNorm = 0.5164, lr_0 = 1.2198e-04
Loss = 7.7534e-02, PNorm = 85.5705, GNorm = 0.6804, lr_0 = 1.2189e-04
Loss = 9.2286e-02, PNorm = 85.5730, GNorm = 0.9181, lr_0 = 1.2181e-04
Loss = 8.0437e-02, PNorm = 85.5748, GNorm = 0.4931, lr_0 = 1.2173e-04
Loss = 8.0257e-02, PNorm = 85.5753, GNorm = 0.6857, lr_0 = 1.2164e-04
Loss = 7.8468e-02, PNorm = 85.5758, GNorm = 0.6143, lr_0 = 1.2156e-04
Loss = 7.6235e-02, PNorm = 85.5783, GNorm = 0.5336, lr_0 = 1.2148e-04
Loss = 7.3147e-02, PNorm = 85.5795, GNorm = 0.6093, lr_0 = 1.2139e-04
Loss = 5.4921e-02, PNorm = 85.5799, GNorm = 0.5360, lr_0 = 1.2131e-04
Loss = 8.2515e-02, PNorm = 85.5820, GNorm = 0.7546, lr_0 = 1.2123e-04
Loss = 8.2340e-02, PNorm = 85.5834, GNorm = 0.6938, lr_0 = 1.2114e-04
Loss = 8.2037e-02, PNorm = 85.5845, GNorm = 0.6848, lr_0 = 1.2106e-04
Loss = 7.8337e-02, PNorm = 85.5863, GNorm = 0.6141, lr_0 = 1.2098e-04
Loss = 8.5726e-02, PNorm = 85.5871, GNorm = 0.7765, lr_0 = 1.2090e-04
Loss = 7.3102e-02, PNorm = 85.5889, GNorm = 0.6913, lr_0 = 1.2081e-04
Loss = 7.6791e-02, PNorm = 85.5923, GNorm = 0.7450, lr_0 = 1.2073e-04
Loss = 7.8564e-02, PNorm = 85.5945, GNorm = 0.5993, lr_0 = 1.2065e-04
Loss = 7.6163e-02, PNorm = 85.5943, GNorm = 0.6435, lr_0 = 1.2056e-04
Loss = 8.3183e-02, PNorm = 85.5955, GNorm = 0.6624, lr_0 = 1.2048e-04
Loss = 6.8656e-02, PNorm = 85.5990, GNorm = 0.5197, lr_0 = 1.2040e-04
Loss = 8.2478e-02, PNorm = 85.6029, GNorm = 0.6036, lr_0 = 1.2032e-04
Loss = 8.1631e-02, PNorm = 85.6040, GNorm = 0.7370, lr_0 = 1.2023e-04
Loss = 7.3027e-02, PNorm = 85.6044, GNorm = 0.5563, lr_0 = 1.2015e-04
Loss = 7.4548e-02, PNorm = 85.6068, GNorm = 0.5148, lr_0 = 1.2007e-04
Loss = 8.0625e-02, PNorm = 85.6084, GNorm = 0.8537, lr_0 = 1.1999e-04
Loss = 8.1291e-02, PNorm = 85.6102, GNorm = 0.6217, lr_0 = 1.1991e-04
Loss = 7.1176e-02, PNorm = 85.6122, GNorm = 0.5524, lr_0 = 1.1982e-04
Loss = 7.1853e-02, PNorm = 85.6146, GNorm = 0.5558, lr_0 = 1.1974e-04
Loss = 8.0016e-02, PNorm = 85.6178, GNorm = 0.5798, lr_0 = 1.1966e-04
Loss = 7.8561e-02, PNorm = 85.6197, GNorm = 0.5744, lr_0 = 1.1958e-04
Loss = 7.6596e-02, PNorm = 85.6218, GNorm = 0.7634, lr_0 = 1.1950e-04
Loss = 8.1696e-02, PNorm = 85.6234, GNorm = 0.8618, lr_0 = 1.1941e-04
Loss = 7.3319e-02, PNorm = 85.6234, GNorm = 0.7776, lr_0 = 1.1933e-04
Loss = 7.3995e-02, PNorm = 85.6243, GNorm = 0.6507, lr_0 = 1.1925e-04
Loss = 8.4587e-02, PNorm = 85.6250, GNorm = 0.9200, lr_0 = 1.1917e-04
Loss = 8.1785e-02, PNorm = 85.6272, GNorm = 0.7148, lr_0 = 1.1909e-04
Loss = 7.9392e-02, PNorm = 85.6286, GNorm = 0.5611, lr_0 = 1.1901e-04
Loss = 7.4873e-02, PNorm = 85.6302, GNorm = 0.6576, lr_0 = 1.1892e-04
Loss = 7.6193e-02, PNorm = 85.6315, GNorm = 0.5467, lr_0 = 1.1884e-04
Loss = 7.4822e-02, PNorm = 85.6328, GNorm = 0.6013, lr_0 = 1.1876e-04
Loss = 7.9229e-02, PNorm = 85.6334, GNorm = 0.6868, lr_0 = 1.1868e-04
Loss = 8.0735e-02, PNorm = 85.6356, GNorm = 0.5392, lr_0 = 1.1860e-04
Loss = 8.5017e-02, PNorm = 85.6376, GNorm = 0.9492, lr_0 = 1.1852e-04
Loss = 7.2048e-02, PNorm = 85.6382, GNorm = 0.7657, lr_0 = 1.1844e-04
Loss = 7.7919e-02, PNorm = 85.6406, GNorm = 0.5318, lr_0 = 1.1835e-04
Loss = 8.2237e-02, PNorm = 85.6423, GNorm = 0.6271, lr_0 = 1.1827e-04
Loss = 8.0535e-02, PNorm = 85.6429, GNorm = 0.5310, lr_0 = 1.1819e-04
Loss = 7.7801e-02, PNorm = 85.6441, GNorm = 0.6314, lr_0 = 1.1811e-04
Loss = 6.7531e-02, PNorm = 85.6457, GNorm = 0.4843, lr_0 = 1.1803e-04
Loss = 8.6659e-02, PNorm = 85.6460, GNorm = 0.7020, lr_0 = 1.1795e-04
Loss = 7.7689e-02, PNorm = 85.6480, GNorm = 0.6488, lr_0 = 1.1787e-04
Validation mae = 0.226520
Epoch 28
Loss = 8.2524e-02, PNorm = 85.6499, GNorm = 0.7142, lr_0 = 1.1779e-04
Loss = 7.2998e-02, PNorm = 85.6509, GNorm = 0.4239, lr_0 = 1.1771e-04
Loss = 7.6245e-02, PNorm = 85.6523, GNorm = 0.5832, lr_0 = 1.1763e-04
Loss = 8.4275e-02, PNorm = 85.6550, GNorm = 0.6408, lr_0 = 1.1755e-04
Loss = 6.0380e-02, PNorm = 85.6571, GNorm = 0.4914, lr_0 = 1.1747e-04
Loss = 8.2812e-02, PNorm = 85.6575, GNorm = 0.8184, lr_0 = 1.1739e-04
Loss = 7.9335e-02, PNorm = 85.6586, GNorm = 0.7143, lr_0 = 1.1730e-04
Loss = 7.6372e-02, PNorm = 85.6602, GNorm = 0.5752, lr_0 = 1.1722e-04
Loss = 8.1589e-02, PNorm = 85.6635, GNorm = 0.6559, lr_0 = 1.1714e-04
Loss = 7.4027e-02, PNorm = 85.6666, GNorm = 0.6057, lr_0 = 1.1706e-04
Loss = 6.9128e-02, PNorm = 85.6676, GNorm = 0.6315, lr_0 = 1.1698e-04
Loss = 6.9418e-02, PNorm = 85.6696, GNorm = 0.4806, lr_0 = 1.1690e-04
Loss = 7.0735e-02, PNorm = 85.6725, GNorm = 0.6347, lr_0 = 1.1682e-04
Loss = 8.1686e-02, PNorm = 85.6743, GNorm = 0.9943, lr_0 = 1.1674e-04
Loss = 7.3775e-02, PNorm = 85.6752, GNorm = 0.5995, lr_0 = 1.1666e-04
Loss = 8.8547e-02, PNorm = 85.6752, GNorm = 0.5573, lr_0 = 1.1658e-04
Loss = 7.3866e-02, PNorm = 85.6764, GNorm = 0.6698, lr_0 = 1.1650e-04
Loss = 6.8921e-02, PNorm = 85.6795, GNorm = 0.5692, lr_0 = 1.1642e-04
Loss = 7.0879e-02, PNorm = 85.6813, GNorm = 0.6797, lr_0 = 1.1634e-04
Loss = 6.4728e-02, PNorm = 85.6830, GNorm = 0.7846, lr_0 = 1.1626e-04
Loss = 7.4685e-02, PNorm = 85.6842, GNorm = 0.8654, lr_0 = 1.1618e-04
Loss = 6.7716e-02, PNorm = 85.6845, GNorm = 0.4401, lr_0 = 1.1611e-04
Loss = 6.9636e-02, PNorm = 85.6872, GNorm = 0.6286, lr_0 = 1.1603e-04
Loss = 7.5309e-02, PNorm = 85.6902, GNorm = 0.6278, lr_0 = 1.1595e-04
Loss = 7.6200e-02, PNorm = 85.6918, GNorm = 0.6129, lr_0 = 1.1587e-04
Loss = 7.0403e-02, PNorm = 85.6927, GNorm = 0.5499, lr_0 = 1.1579e-04
Loss = 7.8388e-02, PNorm = 85.6949, GNorm = 0.8098, lr_0 = 1.1571e-04
Loss = 7.0721e-02, PNorm = 85.6961, GNorm = 0.5263, lr_0 = 1.1563e-04
Loss = 8.7208e-02, PNorm = 85.6984, GNorm = 0.5940, lr_0 = 1.1555e-04
Loss = 7.3681e-02, PNorm = 85.7002, GNorm = 0.7355, lr_0 = 1.1547e-04
Loss = 7.5514e-02, PNorm = 85.7037, GNorm = 0.4418, lr_0 = 1.1539e-04
Loss = 7.3565e-02, PNorm = 85.7067, GNorm = 0.7447, lr_0 = 1.1531e-04
Loss = 8.3764e-02, PNorm = 85.7074, GNorm = 0.8757, lr_0 = 1.1523e-04
Loss = 7.5942e-02, PNorm = 85.7082, GNorm = 0.4996, lr_0 = 1.1515e-04
Loss = 6.6754e-02, PNorm = 85.7097, GNorm = 0.6624, lr_0 = 1.1508e-04
Loss = 8.1998e-02, PNorm = 85.7108, GNorm = 0.8701, lr_0 = 1.1500e-04
Loss = 7.4436e-02, PNorm = 85.7124, GNorm = 0.7458, lr_0 = 1.1492e-04
Loss = 7.7106e-02, PNorm = 85.7121, GNorm = 0.4915, lr_0 = 1.1484e-04
Loss = 6.8456e-02, PNorm = 85.7137, GNorm = 0.5587, lr_0 = 1.1476e-04
Loss = 7.4761e-02, PNorm = 85.7169, GNorm = 0.6554, lr_0 = 1.1468e-04
Loss = 8.0075e-02, PNorm = 85.7181, GNorm = 0.7063, lr_0 = 1.1460e-04
Loss = 7.3286e-02, PNorm = 85.7181, GNorm = 0.6259, lr_0 = 1.1452e-04
Loss = 7.3939e-02, PNorm = 85.7193, GNorm = 0.7239, lr_0 = 1.1445e-04
Loss = 8.2337e-02, PNorm = 85.7223, GNorm = 0.6355, lr_0 = 1.1437e-04
Loss = 7.4194e-02, PNorm = 85.7243, GNorm = 0.5258, lr_0 = 1.1429e-04
Loss = 7.5932e-02, PNorm = 85.7267, GNorm = 0.9186, lr_0 = 1.1421e-04
Loss = 8.3280e-02, PNorm = 85.7272, GNorm = 0.6490, lr_0 = 1.1413e-04
Loss = 7.4835e-02, PNorm = 85.7293, GNorm = 0.6681, lr_0 = 1.1405e-04
Loss = 7.3921e-02, PNorm = 85.7328, GNorm = 0.6601, lr_0 = 1.1398e-04
Loss = 6.7618e-02, PNorm = 85.7344, GNorm = 0.6289, lr_0 = 1.1390e-04
Loss = 7.5468e-02, PNorm = 85.7347, GNorm = 0.8809, lr_0 = 1.1382e-04
Loss = 7.8549e-02, PNorm = 85.7358, GNorm = 0.6320, lr_0 = 1.1374e-04
Loss = 7.9528e-02, PNorm = 85.7376, GNorm = 0.7097, lr_0 = 1.1366e-04
Loss = 7.4134e-02, PNorm = 85.7402, GNorm = 0.6278, lr_0 = 1.1359e-04
Loss = 7.3197e-02, PNorm = 85.7429, GNorm = 0.5823, lr_0 = 1.1351e-04
Loss = 7.1418e-02, PNorm = 85.7422, GNorm = 0.8965, lr_0 = 1.1343e-04
Loss = 6.9738e-02, PNorm = 85.7438, GNorm = 0.6338, lr_0 = 1.1335e-04
Loss = 8.4474e-02, PNorm = 85.7461, GNorm = 0.6286, lr_0 = 1.1328e-04
Loss = 7.8600e-02, PNorm = 85.7488, GNorm = 0.7483, lr_0 = 1.1320e-04
Loss = 7.2028e-02, PNorm = 85.7505, GNorm = 0.7253, lr_0 = 1.1312e-04
Loss = 7.7683e-02, PNorm = 85.7515, GNorm = 0.5911, lr_0 = 1.1304e-04
Loss = 7.0568e-02, PNorm = 85.7541, GNorm = 0.5693, lr_0 = 1.1297e-04
Loss = 7.1956e-02, PNorm = 85.7566, GNorm = 0.6214, lr_0 = 1.1289e-04
Loss = 8.7203e-02, PNorm = 85.7575, GNorm = 0.6763, lr_0 = 1.1281e-04
Loss = 7.1298e-02, PNorm = 85.7587, GNorm = 0.7564, lr_0 = 1.1273e-04
Loss = 7.6053e-02, PNorm = 85.7598, GNorm = 0.7790, lr_0 = 1.1266e-04
Loss = 7.6150e-02, PNorm = 85.7596, GNorm = 0.4851, lr_0 = 1.1258e-04
Loss = 8.9780e-02, PNorm = 85.7613, GNorm = 0.6160, lr_0 = 1.1250e-04
Loss = 6.8292e-02, PNorm = 85.7631, GNorm = 0.6167, lr_0 = 1.1243e-04
Loss = 6.6463e-02, PNorm = 85.7632, GNorm = 0.6950, lr_0 = 1.1235e-04
Loss = 7.4578e-02, PNorm = 85.7632, GNorm = 0.7798, lr_0 = 1.1227e-04
Loss = 7.8884e-02, PNorm = 85.7660, GNorm = 0.6310, lr_0 = 1.1219e-04
Loss = 7.5957e-02, PNorm = 85.7688, GNorm = 0.7173, lr_0 = 1.1212e-04
Loss = 8.1806e-02, PNorm = 85.7701, GNorm = 0.6594, lr_0 = 1.1204e-04
Loss = 8.2611e-02, PNorm = 85.7720, GNorm = 0.9589, lr_0 = 1.1196e-04
Loss = 6.3906e-02, PNorm = 85.7737, GNorm = 0.5562, lr_0 = 1.1189e-04
Loss = 7.5816e-02, PNorm = 85.7749, GNorm = 0.6371, lr_0 = 1.1181e-04
Loss = 8.1186e-02, PNorm = 85.7759, GNorm = 0.6876, lr_0 = 1.1173e-04
Loss = 6.9701e-02, PNorm = 85.7779, GNorm = 0.9950, lr_0 = 1.1166e-04
Loss = 7.1284e-02, PNorm = 85.7795, GNorm = 0.5088, lr_0 = 1.1158e-04
Loss = 7.4485e-02, PNorm = 85.7799, GNorm = 0.6963, lr_0 = 1.1150e-04
Loss = 7.4009e-02, PNorm = 85.7811, GNorm = 0.6862, lr_0 = 1.1143e-04
Loss = 7.6409e-02, PNorm = 85.7823, GNorm = 0.5656, lr_0 = 1.1135e-04
Loss = 8.2730e-02, PNorm = 85.7842, GNorm = 0.9646, lr_0 = 1.1128e-04
Loss = 7.6991e-02, PNorm = 85.7862, GNorm = 0.5427, lr_0 = 1.1120e-04
Loss = 7.2312e-02, PNorm = 85.7879, GNorm = 0.7952, lr_0 = 1.1112e-04
Loss = 8.3067e-02, PNorm = 85.7909, GNorm = 0.6813, lr_0 = 1.1105e-04
Loss = 8.8383e-02, PNorm = 85.7928, GNorm = 0.6640, lr_0 = 1.1097e-04
Loss = 7.6582e-02, PNorm = 85.7951, GNorm = 0.6156, lr_0 = 1.1089e-04
Loss = 8.6003e-02, PNorm = 85.7964, GNorm = 0.7112, lr_0 = 1.1082e-04
Loss = 7.8065e-02, PNorm = 85.7988, GNorm = 0.5392, lr_0 = 1.1074e-04
Loss = 7.4368e-02, PNorm = 85.7997, GNorm = 0.6430, lr_0 = 1.1067e-04
Loss = 7.8334e-02, PNorm = 85.7999, GNorm = 0.5334, lr_0 = 1.1059e-04
Loss = 6.7529e-02, PNorm = 85.8013, GNorm = 0.4619, lr_0 = 1.1052e-04
Loss = 7.8034e-02, PNorm = 85.8036, GNorm = 0.6304, lr_0 = 1.1044e-04
Loss = 6.8895e-02, PNorm = 85.8052, GNorm = 0.7311, lr_0 = 1.1036e-04
Loss = 7.6087e-02, PNorm = 85.8058, GNorm = 0.6218, lr_0 = 1.1029e-04
Loss = 8.0157e-02, PNorm = 85.8074, GNorm = 0.5140, lr_0 = 1.1021e-04
Loss = 8.1436e-02, PNorm = 85.8090, GNorm = 0.5982, lr_0 = 1.1014e-04
Loss = 9.5783e-02, PNorm = 85.8115, GNorm = 0.6814, lr_0 = 1.1006e-04
Loss = 6.8753e-02, PNorm = 85.8130, GNorm = 0.5362, lr_0 = 1.0999e-04
Loss = 7.6542e-02, PNorm = 85.8159, GNorm = 0.6017, lr_0 = 1.0991e-04
Loss = 7.0986e-02, PNorm = 85.8176, GNorm = 0.7544, lr_0 = 1.0984e-04
Loss = 8.2955e-02, PNorm = 85.8202, GNorm = 0.5992, lr_0 = 1.0976e-04
Loss = 7.6981e-02, PNorm = 85.8223, GNorm = 0.7122, lr_0 = 1.0969e-04
Loss = 7.1310e-02, PNorm = 85.8235, GNorm = 0.5221, lr_0 = 1.0961e-04
Loss = 8.6145e-02, PNorm = 85.8263, GNorm = 0.8404, lr_0 = 1.0954e-04
Loss = 7.2034e-02, PNorm = 85.8277, GNorm = 0.6596, lr_0 = 1.0946e-04
Loss = 7.5054e-02, PNorm = 85.8281, GNorm = 0.7518, lr_0 = 1.0939e-04
Loss = 6.6422e-02, PNorm = 85.8289, GNorm = 0.7167, lr_0 = 1.0931e-04
Loss = 7.3840e-02, PNorm = 85.8306, GNorm = 0.5207, lr_0 = 1.0924e-04
Loss = 7.9672e-02, PNorm = 85.8321, GNorm = 0.5817, lr_0 = 1.0916e-04
Loss = 7.7477e-02, PNorm = 85.8334, GNorm = 0.6070, lr_0 = 1.0909e-04
Loss = 7.4219e-02, PNorm = 85.8352, GNorm = 0.5905, lr_0 = 1.0901e-04
Loss = 7.6199e-02, PNorm = 85.8376, GNorm = 0.6823, lr_0 = 1.0894e-04
Loss = 8.6557e-02, PNorm = 85.8397, GNorm = 0.6623, lr_0 = 1.0886e-04
Loss = 7.6591e-02, PNorm = 85.8409, GNorm = 0.9064, lr_0 = 1.0879e-04
Loss = 8.4229e-02, PNorm = 85.8427, GNorm = 0.7335, lr_0 = 1.0871e-04
Loss = 7.2667e-02, PNorm = 85.8451, GNorm = 0.6098, lr_0 = 1.0864e-04
Loss = 6.5318e-02, PNorm = 85.8470, GNorm = 0.5925, lr_0 = 1.0856e-04
Validation mae = 0.226497
Epoch 29
Loss = 7.7330e-02, PNorm = 85.8492, GNorm = 0.8915, lr_0 = 1.0849e-04
Loss = 7.1721e-02, PNorm = 85.8486, GNorm = 0.5010, lr_0 = 1.0841e-04
Loss = 7.1509e-02, PNorm = 85.8497, GNorm = 0.8802, lr_0 = 1.0834e-04
Loss = 6.7035e-02, PNorm = 85.8525, GNorm = 0.5872, lr_0 = 1.0827e-04
Loss = 7.6238e-02, PNorm = 85.8537, GNorm = 0.5767, lr_0 = 1.0819e-04
Loss = 8.2152e-02, PNorm = 85.8549, GNorm = 0.5982, lr_0 = 1.0812e-04
Loss = 6.3836e-02, PNorm = 85.8559, GNorm = 0.6037, lr_0 = 1.0804e-04
Loss = 6.9241e-02, PNorm = 85.8578, GNorm = 0.6056, lr_0 = 1.0797e-04
Loss = 6.7787e-02, PNorm = 85.8591, GNorm = 0.7604, lr_0 = 1.0790e-04
Loss = 7.3556e-02, PNorm = 85.8607, GNorm = 0.7471, lr_0 = 1.0782e-04
Loss = 6.2795e-02, PNorm = 85.8624, GNorm = 0.4227, lr_0 = 1.0775e-04
Loss = 7.3592e-02, PNorm = 85.8634, GNorm = 0.6917, lr_0 = 1.0767e-04
Loss = 7.6403e-02, PNorm = 85.8647, GNorm = 0.6118, lr_0 = 1.0760e-04
Loss = 6.9028e-02, PNorm = 85.8660, GNorm = 0.6018, lr_0 = 1.0753e-04
Loss = 7.4801e-02, PNorm = 85.8676, GNorm = 0.5543, lr_0 = 1.0745e-04
Loss = 7.1333e-02, PNorm = 85.8693, GNorm = 0.5804, lr_0 = 1.0738e-04
Loss = 8.4443e-02, PNorm = 85.8713, GNorm = 0.5995, lr_0 = 1.0731e-04
Loss = 7.5764e-02, PNorm = 85.8732, GNorm = 0.5423, lr_0 = 1.0723e-04
Loss = 8.1224e-02, PNorm = 85.8750, GNorm = 0.6800, lr_0 = 1.0716e-04
Loss = 7.6675e-02, PNorm = 85.8760, GNorm = 0.6891, lr_0 = 1.0709e-04
Loss = 8.1509e-02, PNorm = 85.8764, GNorm = 0.5741, lr_0 = 1.0701e-04
Loss = 7.7685e-02, PNorm = 85.8785, GNorm = 0.5524, lr_0 = 1.0694e-04
Loss = 7.0838e-02, PNorm = 85.8805, GNorm = 0.7884, lr_0 = 1.0687e-04
Loss = 6.8784e-02, PNorm = 85.8810, GNorm = 0.5852, lr_0 = 1.0679e-04
Loss = 7.5728e-02, PNorm = 85.8824, GNorm = 0.5570, lr_0 = 1.0672e-04
Loss = 7.3730e-02, PNorm = 85.8831, GNorm = 0.6504, lr_0 = 1.0665e-04
Loss = 7.4198e-02, PNorm = 85.8829, GNorm = 0.7383, lr_0 = 1.0657e-04
Loss = 6.8938e-02, PNorm = 85.8828, GNorm = 0.8055, lr_0 = 1.0650e-04
Loss = 7.5269e-02, PNorm = 85.8836, GNorm = 0.7572, lr_0 = 1.0643e-04
Loss = 7.4012e-02, PNorm = 85.8837, GNorm = 0.7967, lr_0 = 1.0635e-04
Loss = 7.4960e-02, PNorm = 85.8836, GNorm = 0.6744, lr_0 = 1.0628e-04
Loss = 7.6082e-02, PNorm = 85.8863, GNorm = 0.6513, lr_0 = 1.0621e-04
Loss = 7.1764e-02, PNorm = 85.8888, GNorm = 0.5863, lr_0 = 1.0614e-04
Loss = 7.1928e-02, PNorm = 85.8910, GNorm = 0.8197, lr_0 = 1.0606e-04
Loss = 7.1829e-02, PNorm = 85.8909, GNorm = 0.6899, lr_0 = 1.0599e-04
Loss = 7.8322e-02, PNorm = 85.8920, GNorm = 0.5734, lr_0 = 1.0592e-04
Loss = 7.0152e-02, PNorm = 85.8943, GNorm = 0.4821, lr_0 = 1.0585e-04
Loss = 7.1995e-02, PNorm = 85.8961, GNorm = 0.5546, lr_0 = 1.0577e-04
Loss = 6.6245e-02, PNorm = 85.8983, GNorm = 0.6045, lr_0 = 1.0570e-04
Loss = 7.2672e-02, PNorm = 85.9006, GNorm = 0.5575, lr_0 = 1.0563e-04
Loss = 7.2835e-02, PNorm = 85.9023, GNorm = 0.9260, lr_0 = 1.0556e-04
Loss = 6.6162e-02, PNorm = 85.9047, GNorm = 0.7278, lr_0 = 1.0548e-04
Loss = 6.9872e-02, PNorm = 85.9064, GNorm = 0.5416, lr_0 = 1.0541e-04
Loss = 7.1514e-02, PNorm = 85.9068, GNorm = 0.6875, lr_0 = 1.0534e-04
Loss = 8.7535e-02, PNorm = 85.9065, GNorm = 0.8016, lr_0 = 1.0527e-04
Loss = 6.9522e-02, PNorm = 85.9079, GNorm = 0.5705, lr_0 = 1.0519e-04
Loss = 8.9367e-02, PNorm = 85.9099, GNorm = 0.7163, lr_0 = 1.0512e-04
Loss = 7.7000e-02, PNorm = 85.9105, GNorm = 0.6289, lr_0 = 1.0505e-04
Loss = 7.4657e-02, PNorm = 85.9118, GNorm = 0.5898, lr_0 = 1.0498e-04
Loss = 7.3291e-02, PNorm = 85.9128, GNorm = 0.5060, lr_0 = 1.0491e-04
Loss = 7.6138e-02, PNorm = 85.9142, GNorm = 0.4684, lr_0 = 1.0483e-04
Loss = 7.3250e-02, PNorm = 85.9145, GNorm = 0.5887, lr_0 = 1.0476e-04
Loss = 7.1150e-02, PNorm = 85.9159, GNorm = 0.7070, lr_0 = 1.0469e-04
Loss = 7.1018e-02, PNorm = 85.9182, GNorm = 0.6425, lr_0 = 1.0462e-04
Loss = 8.2232e-02, PNorm = 85.9196, GNorm = 0.5822, lr_0 = 1.0455e-04
Loss = 7.9845e-02, PNorm = 85.9201, GNorm = 0.5547, lr_0 = 1.0448e-04
Loss = 7.3220e-02, PNorm = 85.9219, GNorm = 0.4798, lr_0 = 1.0440e-04
Loss = 7.0963e-02, PNorm = 85.9230, GNorm = 0.4578, lr_0 = 1.0433e-04
Loss = 7.4477e-02, PNorm = 85.9236, GNorm = 0.8654, lr_0 = 1.0426e-04
Loss = 7.5241e-02, PNorm = 85.9247, GNorm = 0.5633, lr_0 = 1.0419e-04
Loss = 8.4208e-02, PNorm = 85.9260, GNorm = 0.8984, lr_0 = 1.0412e-04
Loss = 8.3610e-02, PNorm = 85.9273, GNorm = 0.7059, lr_0 = 1.0405e-04
Loss = 7.1354e-02, PNorm = 85.9299, GNorm = 0.4877, lr_0 = 1.0398e-04
Loss = 8.4220e-02, PNorm = 85.9325, GNorm = 0.5781, lr_0 = 1.0391e-04
Loss = 7.6364e-02, PNorm = 85.9339, GNorm = 0.5330, lr_0 = 1.0383e-04
Loss = 8.0812e-02, PNorm = 85.9362, GNorm = 0.6517, lr_0 = 1.0376e-04
Loss = 7.5377e-02, PNorm = 85.9384, GNorm = 0.8251, lr_0 = 1.0369e-04
Loss = 7.5202e-02, PNorm = 85.9404, GNorm = 0.5016, lr_0 = 1.0362e-04
Loss = 7.3042e-02, PNorm = 85.9422, GNorm = 0.5716, lr_0 = 1.0355e-04
Loss = 6.5602e-02, PNorm = 85.9424, GNorm = 0.5363, lr_0 = 1.0348e-04
Loss = 7.0702e-02, PNorm = 85.9428, GNorm = 0.5881, lr_0 = 1.0341e-04
Loss = 8.2742e-02, PNorm = 85.9442, GNorm = 0.9092, lr_0 = 1.0334e-04
Loss = 7.5233e-02, PNorm = 85.9436, GNorm = 0.4302, lr_0 = 1.0327e-04
Loss = 9.0342e-02, PNorm = 85.9443, GNorm = 0.6296, lr_0 = 1.0320e-04
Loss = 7.3790e-02, PNorm = 85.9472, GNorm = 0.7601, lr_0 = 1.0312e-04
Loss = 7.2570e-02, PNorm = 85.9489, GNorm = 0.4497, lr_0 = 1.0305e-04
Loss = 7.8377e-02, PNorm = 85.9494, GNorm = 0.6652, lr_0 = 1.0298e-04
Loss = 6.8535e-02, PNorm = 85.9508, GNorm = 0.5986, lr_0 = 1.0291e-04
Loss = 7.0274e-02, PNorm = 85.9534, GNorm = 0.6903, lr_0 = 1.0284e-04
Loss = 7.4208e-02, PNorm = 85.9539, GNorm = 0.6444, lr_0 = 1.0277e-04
Loss = 7.3868e-02, PNorm = 85.9550, GNorm = 0.5394, lr_0 = 1.0270e-04
Loss = 7.3031e-02, PNorm = 85.9563, GNorm = 0.6864, lr_0 = 1.0263e-04
Loss = 7.6621e-02, PNorm = 85.9602, GNorm = 0.4181, lr_0 = 1.0256e-04
Loss = 8.2977e-02, PNorm = 85.9633, GNorm = 0.5050, lr_0 = 1.0249e-04
Loss = 7.9042e-02, PNorm = 85.9649, GNorm = 0.6063, lr_0 = 1.0242e-04
Loss = 7.8198e-02, PNorm = 85.9664, GNorm = 0.6642, lr_0 = 1.0235e-04
Loss = 8.6574e-02, PNorm = 85.9677, GNorm = 0.9254, lr_0 = 1.0228e-04
Loss = 7.2705e-02, PNorm = 85.9697, GNorm = 0.7128, lr_0 = 1.0221e-04
Loss = 7.3745e-02, PNorm = 85.9711, GNorm = 0.4689, lr_0 = 1.0214e-04
Loss = 8.0492e-02, PNorm = 85.9715, GNorm = 0.5305, lr_0 = 1.0207e-04
Loss = 8.0727e-02, PNorm = 85.9724, GNorm = 0.5475, lr_0 = 1.0200e-04
Loss = 7.5743e-02, PNorm = 85.9735, GNorm = 0.5823, lr_0 = 1.0193e-04
Loss = 7.0212e-02, PNorm = 85.9748, GNorm = 0.7598, lr_0 = 1.0186e-04
Loss = 7.5844e-02, PNorm = 85.9763, GNorm = 0.7400, lr_0 = 1.0179e-04
Loss = 7.6724e-02, PNorm = 85.9763, GNorm = 0.7750, lr_0 = 1.0172e-04
Loss = 7.4841e-02, PNorm = 85.9775, GNorm = 0.6295, lr_0 = 1.0165e-04
Loss = 8.8338e-02, PNorm = 85.9792, GNorm = 0.5051, lr_0 = 1.0158e-04
Loss = 6.4968e-02, PNorm = 85.9804, GNorm = 0.5320, lr_0 = 1.0151e-04
Loss = 6.7261e-02, PNorm = 85.9820, GNorm = 0.8256, lr_0 = 1.0144e-04
Loss = 7.6233e-02, PNorm = 85.9825, GNorm = 0.6735, lr_0 = 1.0137e-04
Loss = 6.9265e-02, PNorm = 85.9835, GNorm = 0.4311, lr_0 = 1.0130e-04
Loss = 8.5621e-02, PNorm = 85.9855, GNorm = 0.6020, lr_0 = 1.0123e-04
Loss = 7.1032e-02, PNorm = 85.9873, GNorm = 0.6846, lr_0 = 1.0116e-04
Loss = 6.9760e-02, PNorm = 85.9880, GNorm = 0.8436, lr_0 = 1.0110e-04
Loss = 7.7043e-02, PNorm = 85.9899, GNorm = 0.8465, lr_0 = 1.0103e-04
Loss = 7.3655e-02, PNorm = 85.9917, GNorm = 0.7746, lr_0 = 1.0096e-04
Loss = 7.2588e-02, PNorm = 85.9918, GNorm = 0.6031, lr_0 = 1.0089e-04
Loss = 7.1105e-02, PNorm = 85.9924, GNorm = 0.6580, lr_0 = 1.0082e-04
Loss = 8.7920e-02, PNorm = 85.9929, GNorm = 0.7251, lr_0 = 1.0075e-04
Loss = 6.9697e-02, PNorm = 85.9954, GNorm = 0.7581, lr_0 = 1.0068e-04
Loss = 7.1958e-02, PNorm = 85.9966, GNorm = 0.7392, lr_0 = 1.0061e-04
Loss = 8.9747e-02, PNorm = 85.9986, GNorm = 0.6057, lr_0 = 1.0054e-04
Loss = 6.8594e-02, PNorm = 86.0009, GNorm = 0.5966, lr_0 = 1.0047e-04
Loss = 7.6123e-02, PNorm = 86.0027, GNorm = 0.6844, lr_0 = 1.0041e-04
Loss = 7.8755e-02, PNorm = 86.0037, GNorm = 0.5907, lr_0 = 1.0034e-04
Loss = 7.6704e-02, PNorm = 86.0055, GNorm = 0.4828, lr_0 = 1.0027e-04
Loss = 6.9229e-02, PNorm = 86.0072, GNorm = 0.5011, lr_0 = 1.0020e-04
Loss = 7.2252e-02, PNorm = 86.0085, GNorm = 0.5461, lr_0 = 1.0013e-04
Loss = 7.8510e-02, PNorm = 86.0105, GNorm = 0.5156, lr_0 = 1.0006e-04
Loss = 7.4221e-02, PNorm = 86.0123, GNorm = 0.4788, lr_0 = 1.0000e-04
Validation mae = 0.224903
Model 0 best validation mae = 0.224903 on epoch 29
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.225468
Ensemble test mae = 0.225468
Fold 1
Splitting data with seed 1
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.1, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=500, bias=False)
        (W_h): Linear(in_features=500, out_features=500, bias=False)
        (W_o): Linear(in_features=633, out_features=500, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.1, inplace=False)
    (1): Linear(in_features=500, out_features=500, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.1, inplace=False)
    (4): Linear(in_features=500, out_features=500, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.1, inplace=False)
    (7): Linear(in_features=500, out_features=1, bias=True)
  )
)
Number of parameters = 1,142,001
Moving model to cuda
Epoch 0
Loss = 1.1199e+00, PNorm = 47.8606, GNorm = 3.8399, lr_0 = 1.0413e-04
Loss = 1.0459e+00, PNorm = 47.8616, GNorm = 2.3258, lr_0 = 1.0788e-04
Loss = 8.6265e-01, PNorm = 47.8637, GNorm = 2.7346, lr_0 = 1.1163e-04
Loss = 9.8441e-01, PNorm = 47.8662, GNorm = 3.7181, lr_0 = 1.1537e-04
Loss = 9.6954e-01, PNorm = 47.8684, GNorm = 6.0371, lr_0 = 1.1913e-04
Loss = 1.0473e+00, PNorm = 47.8710, GNorm = 2.0080, lr_0 = 1.2287e-04
Loss = 8.4199e-01, PNorm = 47.8753, GNorm = 2.3582, lr_0 = 1.2663e-04
Loss = 7.7174e-01, PNorm = 47.8813, GNorm = 2.9433, lr_0 = 1.3038e-04
Loss = 7.1796e-01, PNorm = 47.8893, GNorm = 2.0690, lr_0 = 1.3413e-04
Loss = 6.6093e-01, PNorm = 47.8971, GNorm = 7.8865, lr_0 = 1.3788e-04
Loss = 6.4660e-01, PNorm = 47.9045, GNorm = 3.0074, lr_0 = 1.4163e-04
Loss = 6.4282e-01, PNorm = 47.9104, GNorm = 18.7534, lr_0 = 1.4537e-04
Loss = 6.2656e-01, PNorm = 47.9143, GNorm = 17.8121, lr_0 = 1.4913e-04
Loss = 5.3420e-01, PNorm = 47.9204, GNorm = 10.0901, lr_0 = 1.5288e-04
Loss = 4.7993e-01, PNorm = 47.9264, GNorm = 6.6631, lr_0 = 1.5662e-04
Loss = 4.9870e-01, PNorm = 47.9311, GNorm = 6.8773, lr_0 = 1.6038e-04
Loss = 5.1517e-01, PNorm = 47.9356, GNorm = 12.1098, lr_0 = 1.6412e-04
Loss = 4.4965e-01, PNorm = 47.9432, GNorm = 4.9919, lr_0 = 1.6788e-04
Loss = 4.2342e-01, PNorm = 47.9491, GNorm = 21.4203, lr_0 = 1.7163e-04
Loss = 4.8487e-01, PNorm = 47.9540, GNorm = 10.8040, lr_0 = 1.7538e-04
Loss = 4.0624e-01, PNorm = 47.9610, GNorm = 5.9775, lr_0 = 1.7913e-04
Loss = 4.1295e-01, PNorm = 47.9664, GNorm = 29.2251, lr_0 = 1.8288e-04
Loss = 3.8759e-01, PNorm = 47.9698, GNorm = 18.4693, lr_0 = 1.8662e-04
Loss = 5.0095e-01, PNorm = 47.9742, GNorm = 15.6645, lr_0 = 1.9038e-04
Loss = 5.5591e-01, PNorm = 47.9803, GNorm = 18.9794, lr_0 = 1.9413e-04
Loss = 5.3901e-01, PNorm = 47.9870, GNorm = 9.9191, lr_0 = 1.9788e-04
Loss = 4.3364e-01, PNorm = 47.9952, GNorm = 4.3901, lr_0 = 2.0163e-04
Loss = 3.5753e-01, PNorm = 48.0029, GNorm = 3.3810, lr_0 = 2.0537e-04
Loss = 4.4222e-01, PNorm = 48.0068, GNorm = 1.6066, lr_0 = 2.0913e-04
Loss = 3.4777e-01, PNorm = 48.0108, GNorm = 1.5404, lr_0 = 2.1288e-04
Loss = 3.6154e-01, PNorm = 48.0155, GNorm = 5.8648, lr_0 = 2.1663e-04
Loss = 4.0592e-01, PNorm = 48.0191, GNorm = 6.4154, lr_0 = 2.2038e-04
Loss = 3.7902e-01, PNorm = 48.0245, GNorm = 10.9894, lr_0 = 2.2412e-04
Loss = 4.0611e-01, PNorm = 48.0298, GNorm = 7.2966, lr_0 = 2.2787e-04
Loss = 3.6962e-01, PNorm = 48.0366, GNorm = 1.5237, lr_0 = 2.3163e-04
Loss = 3.5869e-01, PNorm = 48.0402, GNorm = 4.2260, lr_0 = 2.3538e-04
Loss = 3.1097e-01, PNorm = 48.0435, GNorm = 8.0780, lr_0 = 2.3913e-04
Loss = 3.8886e-01, PNorm = 48.0496, GNorm = 2.2644, lr_0 = 2.4288e-04
Loss = 3.8149e-01, PNorm = 48.0541, GNorm = 13.1182, lr_0 = 2.4662e-04
Loss = 3.8746e-01, PNorm = 48.0611, GNorm = 11.2024, lr_0 = 2.5038e-04
Loss = 3.4527e-01, PNorm = 48.0677, GNorm = 3.7282, lr_0 = 2.5413e-04
Loss = 3.5948e-01, PNorm = 48.0719, GNorm = 7.0009, lr_0 = 2.5788e-04
Loss = 3.2165e-01, PNorm = 48.0779, GNorm = 4.2182, lr_0 = 2.6163e-04
Loss = 3.0115e-01, PNorm = 48.0823, GNorm = 8.0656, lr_0 = 2.6537e-04
Loss = 3.2546e-01, PNorm = 48.0886, GNorm = 7.7879, lr_0 = 2.6912e-04
Loss = 3.2805e-01, PNorm = 48.0961, GNorm = 9.9408, lr_0 = 2.7288e-04
Loss = 3.0711e-01, PNorm = 48.1009, GNorm = 7.8877, lr_0 = 2.7663e-04
Loss = 3.5809e-01, PNorm = 48.1093, GNorm = 2.4064, lr_0 = 2.8038e-04
Loss = 3.2128e-01, PNorm = 48.1149, GNorm = 3.1039, lr_0 = 2.8413e-04
Loss = 3.4060e-01, PNorm = 48.1220, GNorm = 1.6044, lr_0 = 2.8787e-04
Loss = 3.2977e-01, PNorm = 48.1279, GNorm = 3.2844, lr_0 = 2.9163e-04
Loss = 2.9398e-01, PNorm = 48.1341, GNorm = 3.3649, lr_0 = 2.9538e-04
Loss = 3.1891e-01, PNorm = 48.1376, GNorm = 4.4698, lr_0 = 2.9913e-04
Loss = 2.8827e-01, PNorm = 48.1440, GNorm = 6.8381, lr_0 = 3.0288e-04
Loss = 3.8851e-01, PNorm = 48.1502, GNorm = 2.2888, lr_0 = 3.0662e-04
Loss = 3.0493e-01, PNorm = 48.1601, GNorm = 3.8903, lr_0 = 3.1037e-04
Loss = 3.0534e-01, PNorm = 48.1661, GNorm = 3.1087, lr_0 = 3.1413e-04
Loss = 3.1927e-01, PNorm = 48.1731, GNorm = 1.5545, lr_0 = 3.1788e-04
Loss = 3.5622e-01, PNorm = 48.1780, GNorm = 4.1805, lr_0 = 3.2163e-04
Loss = 3.1225e-01, PNorm = 48.1811, GNorm = 1.8983, lr_0 = 3.2538e-04
Loss = 2.9171e-01, PNorm = 48.1896, GNorm = 2.5306, lr_0 = 3.2912e-04
Loss = 3.1426e-01, PNorm = 48.1935, GNorm = 3.6659, lr_0 = 3.3288e-04
Loss = 3.1818e-01, PNorm = 48.2026, GNorm = 4.8900, lr_0 = 3.3663e-04
Loss = 2.7393e-01, PNorm = 48.2088, GNorm = 6.2474, lr_0 = 3.4038e-04
Loss = 3.2227e-01, PNorm = 48.2142, GNorm = 2.4271, lr_0 = 3.4413e-04
Loss = 2.8978e-01, PNorm = 48.2217, GNorm = 7.0221, lr_0 = 3.4787e-04
Loss = 3.0403e-01, PNorm = 48.2273, GNorm = 6.3710, lr_0 = 3.5162e-04
Loss = 2.6297e-01, PNorm = 48.2347, GNorm = 4.8117, lr_0 = 3.5538e-04
Loss = 3.3187e-01, PNorm = 48.2410, GNorm = 13.6810, lr_0 = 3.5913e-04
Loss = 3.6197e-01, PNorm = 48.2483, GNorm = 7.5211, lr_0 = 3.6288e-04
Loss = 3.1976e-01, PNorm = 48.2597, GNorm = 8.2237, lr_0 = 3.6662e-04
Loss = 3.0989e-01, PNorm = 48.2706, GNorm = 4.3521, lr_0 = 3.7037e-04
Loss = 2.7971e-01, PNorm = 48.2789, GNorm = 1.7717, lr_0 = 3.7413e-04
Loss = 3.0497e-01, PNorm = 48.2843, GNorm = 1.2130, lr_0 = 3.7788e-04
Loss = 2.9607e-01, PNorm = 48.2922, GNorm = 2.4579, lr_0 = 3.8163e-04
Loss = 3.1563e-01, PNorm = 48.2981, GNorm = 3.4996, lr_0 = 3.8537e-04
Loss = 3.3876e-01, PNorm = 48.3079, GNorm = 7.3732, lr_0 = 3.8912e-04
Loss = 2.8849e-01, PNorm = 48.3168, GNorm = 5.2306, lr_0 = 3.9287e-04
Loss = 2.8915e-01, PNorm = 48.3227, GNorm = 2.0703, lr_0 = 3.9663e-04
Loss = 2.8096e-01, PNorm = 48.3313, GNorm = 1.7779, lr_0 = 4.0038e-04
Loss = 2.7760e-01, PNorm = 48.3423, GNorm = 1.5816, lr_0 = 4.0413e-04
Loss = 2.6938e-01, PNorm = 48.3519, GNorm = 5.4461, lr_0 = 4.0787e-04
Loss = 2.8528e-01, PNorm = 48.3583, GNorm = 2.2595, lr_0 = 4.1162e-04
Loss = 2.6932e-01, PNorm = 48.3655, GNorm = 1.7105, lr_0 = 4.1537e-04
Loss = 2.6354e-01, PNorm = 48.3747, GNorm = 2.7771, lr_0 = 4.1913e-04
Loss = 3.2707e-01, PNorm = 48.3780, GNorm = 4.7386, lr_0 = 4.2288e-04
Loss = 2.9043e-01, PNorm = 48.3885, GNorm = 3.1643, lr_0 = 4.2662e-04
Loss = 3.2501e-01, PNorm = 48.3970, GNorm = 7.7523, lr_0 = 4.3037e-04
Loss = 3.3534e-01, PNorm = 48.4049, GNorm = 5.1137, lr_0 = 4.3412e-04
Loss = 3.1576e-01, PNorm = 48.4207, GNorm = 8.5449, lr_0 = 4.3788e-04
Loss = 3.7751e-01, PNorm = 48.4340, GNorm = 1.4842, lr_0 = 4.4163e-04
Loss = 3.0411e-01, PNorm = 48.4452, GNorm = 2.1939, lr_0 = 4.4538e-04
Loss = 2.7813e-01, PNorm = 48.4585, GNorm = 4.0069, lr_0 = 4.4912e-04
Loss = 2.6896e-01, PNorm = 48.4676, GNorm = 1.2749, lr_0 = 4.5287e-04
Loss = 2.6304e-01, PNorm = 48.4767, GNorm = 4.8775, lr_0 = 4.5662e-04
Loss = 2.8470e-01, PNorm = 48.4850, GNorm = 2.6323, lr_0 = 4.6038e-04
Loss = 2.8848e-01, PNorm = 48.4870, GNorm = 2.3882, lr_0 = 4.6413e-04
Loss = 2.8852e-01, PNorm = 48.4969, GNorm = 4.0036, lr_0 = 4.6787e-04
Loss = 3.2055e-01, PNorm = 48.5074, GNorm = 4.3073, lr_0 = 4.7162e-04
Loss = 3.1391e-01, PNorm = 48.5182, GNorm = 2.9091, lr_0 = 4.7537e-04
Loss = 2.4743e-01, PNorm = 48.5291, GNorm = 3.8538, lr_0 = 4.7913e-04
Loss = 3.4655e-01, PNorm = 48.5359, GNorm = 7.3551, lr_0 = 4.8288e-04
Loss = 2.8609e-01, PNorm = 48.5469, GNorm = 3.1625, lr_0 = 4.8663e-04
Loss = 2.9009e-01, PNorm = 48.5555, GNorm = 11.0627, lr_0 = 4.9038e-04
Loss = 2.9792e-01, PNorm = 48.5651, GNorm = 3.0195, lr_0 = 4.9412e-04
Loss = 3.3948e-01, PNorm = 48.5714, GNorm = 8.9712, lr_0 = 4.9788e-04
Loss = 3.4442e-01, PNorm = 48.5902, GNorm = 1.6358, lr_0 = 5.0163e-04
Loss = 3.1412e-01, PNorm = 48.5963, GNorm = 6.2980, lr_0 = 5.0538e-04
Loss = 3.0971e-01, PNorm = 48.6111, GNorm = 2.7618, lr_0 = 5.0913e-04
Loss = 3.1071e-01, PNorm = 48.6221, GNorm = 7.9446, lr_0 = 5.1287e-04
Loss = 2.8996e-01, PNorm = 48.6311, GNorm = 2.3792, lr_0 = 5.1663e-04
Loss = 2.4735e-01, PNorm = 48.6430, GNorm = 3.5224, lr_0 = 5.2038e-04
Loss = 3.1671e-01, PNorm = 48.6552, GNorm = 4.7746, lr_0 = 5.2413e-04
Loss = 2.9930e-01, PNorm = 48.6654, GNorm = 1.3889, lr_0 = 5.2788e-04
Loss = 2.6371e-01, PNorm = 48.6749, GNorm = 1.5151, lr_0 = 5.3162e-04
Loss = 2.6708e-01, PNorm = 48.6837, GNorm = 2.1388, lr_0 = 5.3538e-04
Loss = 2.7776e-01, PNorm = 48.6918, GNorm = 5.6028, lr_0 = 5.3912e-04
Loss = 3.1254e-01, PNorm = 48.7066, GNorm = 7.4228, lr_0 = 5.4288e-04
Loss = 3.0171e-01, PNorm = 48.7181, GNorm = 7.0168, lr_0 = 5.4663e-04
Loss = 3.3809e-01, PNorm = 48.7312, GNorm = 2.9255, lr_0 = 5.5038e-04
Validation mae = 0.321229
Epoch 1
Loss = 2.5457e-01, PNorm = 48.7447, GNorm = 2.2306, lr_0 = 5.5413e-04
Loss = 2.8383e-01, PNorm = 48.7580, GNorm = 4.6857, lr_0 = 5.5787e-04
Loss = 2.7324e-01, PNorm = 48.7719, GNorm = 2.6818, lr_0 = 5.6163e-04
Loss = 2.5119e-01, PNorm = 48.7806, GNorm = 3.6681, lr_0 = 5.6538e-04
Loss = 2.6487e-01, PNorm = 48.7951, GNorm = 3.7783, lr_0 = 5.6913e-04
Loss = 2.9854e-01, PNorm = 48.8048, GNorm = 3.2912, lr_0 = 5.7288e-04
Loss = 2.7434e-01, PNorm = 48.8172, GNorm = 1.0535, lr_0 = 5.7662e-04
Loss = 2.8004e-01, PNorm = 48.8271, GNorm = 3.3154, lr_0 = 5.8038e-04
Loss = 2.4939e-01, PNorm = 48.8405, GNorm = 2.0849, lr_0 = 5.8413e-04
Loss = 2.7847e-01, PNorm = 48.8494, GNorm = 2.8641, lr_0 = 5.8788e-04
Loss = 2.4562e-01, PNorm = 48.8636, GNorm = 7.7645, lr_0 = 5.9163e-04
Loss = 2.3923e-01, PNorm = 48.8749, GNorm = 3.2063, lr_0 = 5.9538e-04
Loss = 2.8019e-01, PNorm = 48.8862, GNorm = 3.0616, lr_0 = 5.9913e-04
Loss = 2.8749e-01, PNorm = 48.9020, GNorm = 1.8476, lr_0 = 6.0288e-04
Loss = 3.1495e-01, PNorm = 48.9154, GNorm = 8.2935, lr_0 = 6.0663e-04
Loss = 3.0577e-01, PNorm = 48.9302, GNorm = 5.9887, lr_0 = 6.1038e-04
Loss = 2.8325e-01, PNorm = 48.9518, GNorm = 2.8029, lr_0 = 6.1413e-04
Loss = 2.7697e-01, PNorm = 48.9722, GNorm = 2.4036, lr_0 = 6.1788e-04
Loss = 3.0290e-01, PNorm = 48.9881, GNorm = 8.5328, lr_0 = 6.2163e-04
Loss = 3.4589e-01, PNorm = 49.0056, GNorm = 1.3338, lr_0 = 6.2538e-04
Loss = 3.0082e-01, PNorm = 49.0239, GNorm = 5.1378, lr_0 = 6.2913e-04
Loss = 2.7343e-01, PNorm = 49.0401, GNorm = 2.2997, lr_0 = 6.3288e-04
Loss = 2.2810e-01, PNorm = 49.0563, GNorm = 1.0682, lr_0 = 6.3663e-04
Loss = 2.7639e-01, PNorm = 49.0625, GNorm = 1.3798, lr_0 = 6.4038e-04
Loss = 2.7220e-01, PNorm = 49.0757, GNorm = 2.0927, lr_0 = 6.4413e-04
Loss = 2.7546e-01, PNorm = 49.0943, GNorm = 4.1084, lr_0 = 6.4788e-04
Loss = 3.0294e-01, PNorm = 49.1170, GNorm = 5.0799, lr_0 = 6.5163e-04
Loss = 2.7787e-01, PNorm = 49.1270, GNorm = 1.8627, lr_0 = 6.5538e-04
Loss = 2.6954e-01, PNorm = 49.1397, GNorm = 1.2311, lr_0 = 6.5913e-04
Loss = 2.5758e-01, PNorm = 49.1540, GNorm = 1.8882, lr_0 = 6.6288e-04
Loss = 3.0436e-01, PNorm = 49.1630, GNorm = 3.4265, lr_0 = 6.6663e-04
Loss = 2.8125e-01, PNorm = 49.1823, GNorm = 1.2033, lr_0 = 6.7038e-04
Loss = 2.5757e-01, PNorm = 49.1901, GNorm = 2.5553, lr_0 = 6.7413e-04
Loss = 2.9996e-01, PNorm = 49.2058, GNorm = 4.5518, lr_0 = 6.7788e-04
Loss = 2.6712e-01, PNorm = 49.2228, GNorm = 4.8201, lr_0 = 6.8163e-04
Loss = 2.8404e-01, PNorm = 49.2392, GNorm = 2.9926, lr_0 = 6.8538e-04
Loss = 2.4783e-01, PNorm = 49.2557, GNorm = 1.5105, lr_0 = 6.8913e-04
Loss = 2.7703e-01, PNorm = 49.2683, GNorm = 3.2571, lr_0 = 6.9288e-04
Loss = 2.6642e-01, PNorm = 49.2847, GNorm = 1.4106, lr_0 = 6.9663e-04
Loss = 2.7003e-01, PNorm = 49.3041, GNorm = 2.3888, lr_0 = 7.0038e-04
Loss = 2.5737e-01, PNorm = 49.3169, GNorm = 1.7728, lr_0 = 7.0413e-04
Loss = 2.9701e-01, PNorm = 49.3300, GNorm = 3.2634, lr_0 = 7.0788e-04
Loss = 2.9577e-01, PNorm = 49.3485, GNorm = 2.6184, lr_0 = 7.1163e-04
Loss = 2.6975e-01, PNorm = 49.3719, GNorm = 1.0491, lr_0 = 7.1538e-04
Loss = 2.4915e-01, PNorm = 49.3893, GNorm = 0.7787, lr_0 = 7.1913e-04
Loss = 2.7495e-01, PNorm = 49.4077, GNorm = 2.2547, lr_0 = 7.2288e-04
Loss = 2.3643e-01, PNorm = 49.4284, GNorm = 1.4623, lr_0 = 7.2663e-04
Loss = 2.5894e-01, PNorm = 49.4368, GNorm = 3.4409, lr_0 = 7.3038e-04
Loss = 3.0675e-01, PNorm = 49.4525, GNorm = 6.4207, lr_0 = 7.3413e-04
Loss = 3.0501e-01, PNorm = 49.4806, GNorm = 1.5066, lr_0 = 7.3788e-04
Loss = 2.6403e-01, PNorm = 49.5029, GNorm = 2.0856, lr_0 = 7.4163e-04
Loss = 2.9282e-01, PNorm = 49.5238, GNorm = 2.6907, lr_0 = 7.4538e-04
Loss = 3.0382e-01, PNorm = 49.5472, GNorm = 2.4621, lr_0 = 7.4913e-04
Loss = 2.8377e-01, PNorm = 49.5824, GNorm = 4.1443, lr_0 = 7.5288e-04
Loss = 3.1583e-01, PNorm = 49.6075, GNorm = 1.9483, lr_0 = 7.5663e-04
Loss = 2.8727e-01, PNorm = 49.6310, GNorm = 5.9480, lr_0 = 7.6038e-04
Loss = 2.9439e-01, PNorm = 49.6599, GNorm = 2.2557, lr_0 = 7.6413e-04
Loss = 2.5026e-01, PNorm = 49.6737, GNorm = 2.0808, lr_0 = 7.6788e-04
Loss = 2.7981e-01, PNorm = 49.6883, GNorm = 2.2627, lr_0 = 7.7163e-04
Loss = 2.3387e-01, PNorm = 49.6943, GNorm = 1.5752, lr_0 = 7.7538e-04
Loss = 3.0021e-01, PNorm = 49.7105, GNorm = 3.6567, lr_0 = 7.7913e-04
Loss = 2.7275e-01, PNorm = 49.7264, GNorm = 1.2965, lr_0 = 7.8288e-04
Loss = 2.7555e-01, PNorm = 49.7434, GNorm = 5.5371, lr_0 = 7.8663e-04
Loss = 2.6502e-01, PNorm = 49.7569, GNorm = 3.7906, lr_0 = 7.9038e-04
Loss = 2.8563e-01, PNorm = 49.7755, GNorm = 2.2079, lr_0 = 7.9413e-04
Loss = 2.8591e-01, PNorm = 49.8009, GNorm = 3.2122, lr_0 = 7.9788e-04
Loss = 2.2386e-01, PNorm = 49.8206, GNorm = 3.5071, lr_0 = 8.0163e-04
Loss = 2.2581e-01, PNorm = 49.8411, GNorm = 1.1778, lr_0 = 8.0538e-04
Loss = 2.0314e-01, PNorm = 49.8546, GNorm = 0.9453, lr_0 = 8.0913e-04
Loss = 2.3892e-01, PNorm = 49.8784, GNorm = 2.7167, lr_0 = 8.1288e-04
Loss = 2.5113e-01, PNorm = 49.8923, GNorm = 1.6514, lr_0 = 8.1663e-04
Loss = 2.5272e-01, PNorm = 49.9070, GNorm = 3.5802, lr_0 = 8.2038e-04
Loss = 2.8894e-01, PNorm = 49.9263, GNorm = 1.7623, lr_0 = 8.2413e-04
Loss = 2.2293e-01, PNorm = 49.9496, GNorm = 4.0140, lr_0 = 8.2788e-04
Loss = 2.3611e-01, PNorm = 49.9673, GNorm = 2.0151, lr_0 = 8.3163e-04
Loss = 2.3952e-01, PNorm = 49.9834, GNorm = 0.8949, lr_0 = 8.3538e-04
Loss = 2.3570e-01, PNorm = 50.0050, GNorm = 1.4286, lr_0 = 8.3913e-04
Loss = 2.9439e-01, PNorm = 50.0271, GNorm = 2.8310, lr_0 = 8.4288e-04
Loss = 2.9348e-01, PNorm = 50.0587, GNorm = 2.1695, lr_0 = 8.4663e-04
Loss = 2.9741e-01, PNorm = 50.0837, GNorm = 4.5275, lr_0 = 8.5038e-04
Loss = 2.4364e-01, PNorm = 50.1149, GNorm = 0.8072, lr_0 = 8.5413e-04
Loss = 2.2344e-01, PNorm = 50.1358, GNorm = 1.3783, lr_0 = 8.5788e-04
Loss = 2.3684e-01, PNorm = 50.1590, GNorm = 1.4514, lr_0 = 8.6163e-04
Loss = 2.2885e-01, PNorm = 50.1716, GNorm = 2.2654, lr_0 = 8.6538e-04
Loss = 2.7772e-01, PNorm = 50.1964, GNorm = 5.3004, lr_0 = 8.6913e-04
Loss = 2.5791e-01, PNorm = 50.2194, GNorm = 2.9918, lr_0 = 8.7288e-04
Loss = 2.5719e-01, PNorm = 50.2428, GNorm = 1.1643, lr_0 = 8.7663e-04
Loss = 2.7614e-01, PNorm = 50.2548, GNorm = 1.4459, lr_0 = 8.8038e-04
Loss = 2.5044e-01, PNorm = 50.2739, GNorm = 3.4094, lr_0 = 8.8413e-04
Loss = 2.5383e-01, PNorm = 50.2936, GNorm = 2.4459, lr_0 = 8.8788e-04
Loss = 2.9216e-01, PNorm = 50.3168, GNorm = 3.4759, lr_0 = 8.9163e-04
Loss = 2.5121e-01, PNorm = 50.3364, GNorm = 2.3879, lr_0 = 8.9538e-04
Loss = 2.3051e-01, PNorm = 50.3556, GNorm = 2.2707, lr_0 = 8.9913e-04
Loss = 2.2794e-01, PNorm = 50.3756, GNorm = 3.7723, lr_0 = 9.0288e-04
Loss = 2.6511e-01, PNorm = 50.3894, GNorm = 1.5107, lr_0 = 9.0663e-04
Loss = 2.3524e-01, PNorm = 50.4098, GNorm = 1.2120, lr_0 = 9.1038e-04
Loss = 2.3439e-01, PNorm = 50.4276, GNorm = 2.7539, lr_0 = 9.1413e-04
Loss = 3.0165e-01, PNorm = 50.4516, GNorm = 1.3461, lr_0 = 9.1788e-04
Loss = 2.5321e-01, PNorm = 50.4825, GNorm = 0.9349, lr_0 = 9.2163e-04
Loss = 2.2889e-01, PNorm = 50.5047, GNorm = 2.2147, lr_0 = 9.2538e-04
Loss = 2.6940e-01, PNorm = 50.5260, GNorm = 1.5920, lr_0 = 9.2913e-04
Loss = 2.4895e-01, PNorm = 50.5538, GNorm = 1.1412, lr_0 = 9.3288e-04
Loss = 2.4145e-01, PNorm = 50.5804, GNorm = 0.8010, lr_0 = 9.3663e-04
Loss = 2.1979e-01, PNorm = 50.6078, GNorm = 4.7546, lr_0 = 9.4038e-04
Loss = 2.7691e-01, PNorm = 50.6410, GNorm = 5.6593, lr_0 = 9.4413e-04
Loss = 2.5298e-01, PNorm = 50.6795, GNorm = 1.0675, lr_0 = 9.4788e-04
Loss = 2.3611e-01, PNorm = 50.7102, GNorm = 1.8882, lr_0 = 9.5163e-04
Loss = 2.7812e-01, PNorm = 50.7375, GNorm = 1.3249, lr_0 = 9.5538e-04
Loss = 2.7164e-01, PNorm = 50.7599, GNorm = 2.8019, lr_0 = 9.5913e-04
Loss = 2.2765e-01, PNorm = 50.7801, GNorm = 4.0154, lr_0 = 9.6288e-04
Loss = 2.4284e-01, PNorm = 50.8047, GNorm = 2.2168, lr_0 = 9.6663e-04
Loss = 2.4965e-01, PNorm = 50.8355, GNorm = 2.5874, lr_0 = 9.7038e-04
Loss = 2.5870e-01, PNorm = 50.8653, GNorm = 1.6926, lr_0 = 9.7413e-04
Loss = 2.6862e-01, PNorm = 50.8806, GNorm = 2.0847, lr_0 = 9.7788e-04
Loss = 2.3198e-01, PNorm = 50.9042, GNorm = 1.7430, lr_0 = 9.8163e-04
Loss = 2.7419e-01, PNorm = 50.9308, GNorm = 1.0490, lr_0 = 9.8537e-04
Loss = 2.4218e-01, PNorm = 50.9622, GNorm = 0.9353, lr_0 = 9.8912e-04
Loss = 2.7587e-01, PNorm = 50.9948, GNorm = 1.8410, lr_0 = 9.9288e-04
Loss = 2.3697e-01, PNorm = 51.0170, GNorm = 2.3312, lr_0 = 9.9663e-04
Loss = 2.3198e-01, PNorm = 51.0385, GNorm = 0.7447, lr_0 = 9.9993e-04
Validation mae = 0.286012
Epoch 2
Loss = 2.3353e-01, PNorm = 51.0606, GNorm = 1.3571, lr_0 = 9.9925e-04
Loss = 2.3657e-01, PNorm = 51.0839, GNorm = 1.4415, lr_0 = 9.9856e-04
Loss = 2.3989e-01, PNorm = 51.1111, GNorm = 1.3520, lr_0 = 9.9788e-04
Loss = 2.2960e-01, PNorm = 51.1385, GNorm = 1.5279, lr_0 = 9.9719e-04
Loss = 2.7282e-01, PNorm = 51.1594, GNorm = 3.4727, lr_0 = 9.9651e-04
Loss = 2.6648e-01, PNorm = 51.1947, GNorm = 2.0834, lr_0 = 9.9583e-04
Loss = 2.3663e-01, PNorm = 51.2360, GNorm = 1.5906, lr_0 = 9.9515e-04
Loss = 2.4216e-01, PNorm = 51.2666, GNorm = 0.6048, lr_0 = 9.9446e-04
Loss = 2.2385e-01, PNorm = 51.2883, GNorm = 2.2068, lr_0 = 9.9378e-04
Loss = 2.4602e-01, PNorm = 51.3128, GNorm = 1.1746, lr_0 = 9.9310e-04
Loss = 2.2840e-01, PNorm = 51.3382, GNorm = 1.9831, lr_0 = 9.9242e-04
Loss = 2.2862e-01, PNorm = 51.3709, GNorm = 0.8659, lr_0 = 9.9174e-04
Loss = 2.5686e-01, PNorm = 51.4015, GNorm = 1.0890, lr_0 = 9.9106e-04
Loss = 2.1938e-01, PNorm = 51.4236, GNorm = 2.4030, lr_0 = 9.9038e-04
Loss = 2.5701e-01, PNorm = 51.4458, GNorm = 3.8623, lr_0 = 9.8971e-04
Loss = 2.3846e-01, PNorm = 51.4745, GNorm = 2.4629, lr_0 = 9.8903e-04
Loss = 2.2404e-01, PNorm = 51.5100, GNorm = 1.3970, lr_0 = 9.8835e-04
Loss = 2.5245e-01, PNorm = 51.5405, GNorm = 2.6228, lr_0 = 9.8767e-04
Loss = 2.2815e-01, PNorm = 51.5629, GNorm = 0.8009, lr_0 = 9.8700e-04
Loss = 2.1957e-01, PNorm = 51.5786, GNorm = 0.8699, lr_0 = 9.8632e-04
Loss = 2.4972e-01, PNorm = 51.5963, GNorm = 3.2389, lr_0 = 9.8564e-04
Loss = 2.5923e-01, PNorm = 51.6257, GNorm = 1.7471, lr_0 = 9.8497e-04
Loss = 2.4169e-01, PNorm = 51.6585, GNorm = 5.0850, lr_0 = 9.8429e-04
Loss = 2.8568e-01, PNorm = 51.6942, GNorm = 1.8515, lr_0 = 9.8362e-04
Loss = 2.2916e-01, PNorm = 51.7310, GNorm = 1.5355, lr_0 = 9.8295e-04
Loss = 2.3155e-01, PNorm = 51.7579, GNorm = 0.7658, lr_0 = 9.8227e-04
Loss = 2.1732e-01, PNorm = 51.7843, GNorm = 2.1505, lr_0 = 9.8160e-04
Loss = 2.4828e-01, PNorm = 51.8082, GNorm = 1.1977, lr_0 = 9.8093e-04
Loss = 2.3516e-01, PNorm = 51.8227, GNorm = 2.0474, lr_0 = 9.8026e-04
Loss = 2.5448e-01, PNorm = 51.8580, GNorm = 0.8608, lr_0 = 9.7958e-04
Loss = 2.2638e-01, PNorm = 51.8828, GNorm = 1.0351, lr_0 = 9.7891e-04
Loss = 2.1822e-01, PNorm = 51.9081, GNorm = 1.3593, lr_0 = 9.7824e-04
Loss = 2.3068e-01, PNorm = 51.9340, GNorm = 1.0834, lr_0 = 9.7757e-04
Loss = 2.4888e-01, PNorm = 51.9582, GNorm = 0.7791, lr_0 = 9.7690e-04
Loss = 1.9685e-01, PNorm = 51.9811, GNorm = 1.1890, lr_0 = 9.7623e-04
Loss = 2.7711e-01, PNorm = 52.0022, GNorm = 2.2439, lr_0 = 9.7556e-04
Loss = 2.2759e-01, PNorm = 52.0303, GNorm = 0.9358, lr_0 = 9.7490e-04
Loss = 2.5201e-01, PNorm = 52.0665, GNorm = 1.0777, lr_0 = 9.7423e-04
Loss = 2.0420e-01, PNorm = 52.0931, GNorm = 1.7959, lr_0 = 9.7356e-04
Loss = 2.6918e-01, PNorm = 52.1099, GNorm = 1.7761, lr_0 = 9.7289e-04
Loss = 2.2488e-01, PNorm = 52.1449, GNorm = 0.7711, lr_0 = 9.7223e-04
Loss = 2.4751e-01, PNorm = 52.1639, GNorm = 1.3085, lr_0 = 9.7156e-04
Loss = 2.4202e-01, PNorm = 52.1925, GNorm = 1.6865, lr_0 = 9.7090e-04
Loss = 2.6493e-01, PNorm = 52.2093, GNorm = 1.4554, lr_0 = 9.7023e-04
Loss = 3.0348e-01, PNorm = 52.2447, GNorm = 4.3840, lr_0 = 9.6957e-04
Loss = 2.6279e-01, PNorm = 52.2788, GNorm = 1.4650, lr_0 = 9.6890e-04
Loss = 2.6647e-01, PNorm = 52.3182, GNorm = 0.6662, lr_0 = 9.6824e-04
Loss = 2.2240e-01, PNorm = 52.3503, GNorm = 1.2376, lr_0 = 9.6757e-04
Loss = 2.1085e-01, PNorm = 52.3741, GNorm = 1.6555, lr_0 = 9.6691e-04
Loss = 1.9746e-01, PNorm = 52.3958, GNorm = 0.9132, lr_0 = 9.6625e-04
Loss = 2.3899e-01, PNorm = 52.4131, GNorm = 1.6961, lr_0 = 9.6559e-04
Loss = 2.4184e-01, PNorm = 52.4415, GNorm = 4.1053, lr_0 = 9.6493e-04
Loss = 2.9116e-01, PNorm = 52.4725, GNorm = 2.4637, lr_0 = 9.6427e-04
Loss = 2.7449e-01, PNorm = 52.5086, GNorm = 2.3082, lr_0 = 9.6360e-04
Loss = 2.7370e-01, PNorm = 52.5354, GNorm = 1.0892, lr_0 = 9.6294e-04
Loss = 2.1667e-01, PNorm = 52.5569, GNorm = 1.3943, lr_0 = 9.6228e-04
Loss = 2.3083e-01, PNorm = 52.5773, GNorm = 0.8535, lr_0 = 9.6163e-04
Loss = 2.0155e-01, PNorm = 52.6004, GNorm = 1.3255, lr_0 = 9.6097e-04
Loss = 2.2124e-01, PNorm = 52.6145, GNorm = 0.9582, lr_0 = 9.6031e-04
Loss = 2.0458e-01, PNorm = 52.6423, GNorm = 2.8581, lr_0 = 9.5965e-04
Loss = 2.1489e-01, PNorm = 52.6607, GNorm = 0.8592, lr_0 = 9.5899e-04
Loss = 2.3295e-01, PNorm = 52.6750, GNorm = 3.2975, lr_0 = 9.5834e-04
Loss = 1.8859e-01, PNorm = 52.6993, GNorm = 0.6783, lr_0 = 9.5768e-04
Loss = 2.2673e-01, PNorm = 52.7144, GNorm = 2.0658, lr_0 = 9.5702e-04
Loss = 2.3292e-01, PNorm = 52.7438, GNorm = 2.7701, lr_0 = 9.5637e-04
Loss = 2.1797e-01, PNorm = 52.7727, GNorm = 1.1225, lr_0 = 9.5571e-04
Loss = 1.8138e-01, PNorm = 52.7949, GNorm = 0.8705, lr_0 = 9.5506e-04
Loss = 2.2451e-01, PNorm = 52.8057, GNorm = 0.6451, lr_0 = 9.5440e-04
Loss = 2.7635e-01, PNorm = 52.8320, GNorm = 4.0020, lr_0 = 9.5375e-04
Loss = 2.2456e-01, PNorm = 52.8521, GNorm = 1.4255, lr_0 = 9.5310e-04
Loss = 2.3388e-01, PNorm = 52.8784, GNorm = 0.7692, lr_0 = 9.5244e-04
Loss = 2.3044e-01, PNorm = 52.9055, GNorm = 2.0329, lr_0 = 9.5179e-04
Loss = 2.2829e-01, PNorm = 52.9369, GNorm = 0.7675, lr_0 = 9.5114e-04
Loss = 1.9295e-01, PNorm = 52.9611, GNorm = 1.4917, lr_0 = 9.5049e-04
Loss = 2.2452e-01, PNorm = 52.9770, GNorm = 1.0047, lr_0 = 9.4984e-04
Loss = 2.0156e-01, PNorm = 52.9901, GNorm = 1.0127, lr_0 = 9.4919e-04
Loss = 2.3984e-01, PNorm = 53.0116, GNorm = 1.3030, lr_0 = 9.4854e-04
Loss = 2.6929e-01, PNorm = 53.0512, GNorm = 0.8201, lr_0 = 9.4789e-04
Loss = 1.9417e-01, PNorm = 53.0756, GNorm = 0.8242, lr_0 = 9.4724e-04
Loss = 2.3147e-01, PNorm = 53.0966, GNorm = 1.2487, lr_0 = 9.4659e-04
Loss = 2.1544e-01, PNorm = 53.1223, GNorm = 1.3932, lr_0 = 9.4594e-04
Loss = 2.3205e-01, PNorm = 53.1431, GNorm = 2.0104, lr_0 = 9.4529e-04
Loss = 2.3899e-01, PNorm = 53.1723, GNorm = 0.8931, lr_0 = 9.4464e-04
Loss = 2.2107e-01, PNorm = 53.1959, GNorm = 0.7763, lr_0 = 9.4400e-04
Loss = 2.2828e-01, PNorm = 53.2218, GNorm = 0.9648, lr_0 = 9.4335e-04
Loss = 2.2167e-01, PNorm = 53.2417, GNorm = 0.6206, lr_0 = 9.4270e-04
Loss = 2.6599e-01, PNorm = 53.2789, GNorm = 1.3326, lr_0 = 9.4206e-04
Loss = 2.4052e-01, PNorm = 53.3117, GNorm = 1.9702, lr_0 = 9.4141e-04
Loss = 2.2744e-01, PNorm = 53.3456, GNorm = 0.9224, lr_0 = 9.4077e-04
Loss = 1.8279e-01, PNorm = 53.3782, GNorm = 0.7289, lr_0 = 9.4012e-04
Loss = 2.2280e-01, PNorm = 53.3973, GNorm = 0.7823, lr_0 = 9.3948e-04
Loss = 2.1876e-01, PNorm = 53.4191, GNorm = 2.5043, lr_0 = 9.3884e-04
Loss = 1.9621e-01, PNorm = 53.4288, GNorm = 1.3493, lr_0 = 9.3819e-04
Loss = 2.2746e-01, PNorm = 53.4548, GNorm = 0.9697, lr_0 = 9.3755e-04
Loss = 2.3449e-01, PNorm = 53.4864, GNorm = 1.1488, lr_0 = 9.3691e-04
Loss = 1.8387e-01, PNorm = 53.5083, GNorm = 1.1792, lr_0 = 9.3627e-04
Loss = 1.8268e-01, PNorm = 53.5299, GNorm = 0.9936, lr_0 = 9.3562e-04
Loss = 2.1707e-01, PNorm = 53.5497, GNorm = 1.1702, lr_0 = 9.3498e-04
Loss = 2.4023e-01, PNorm = 53.5796, GNorm = 2.4868, lr_0 = 9.3434e-04
Loss = 2.0413e-01, PNorm = 53.6082, GNorm = 0.8429, lr_0 = 9.3370e-04
Loss = 2.0371e-01, PNorm = 53.6323, GNorm = 0.8985, lr_0 = 9.3306e-04
Loss = 2.0786e-01, PNorm = 53.6560, GNorm = 1.1198, lr_0 = 9.3242e-04
Loss = 2.2234e-01, PNorm = 53.6741, GNorm = 2.3288, lr_0 = 9.3178e-04
Loss = 2.1352e-01, PNorm = 53.7035, GNorm = 1.3626, lr_0 = 9.3115e-04
Loss = 2.0744e-01, PNorm = 53.7364, GNorm = 1.8833, lr_0 = 9.3051e-04
Loss = 2.3557e-01, PNorm = 53.7640, GNorm = 1.4661, lr_0 = 9.2987e-04
Loss = 2.3219e-01, PNorm = 53.7950, GNorm = 1.6551, lr_0 = 9.2923e-04
Loss = 1.9475e-01, PNorm = 53.8202, GNorm = 1.5945, lr_0 = 9.2860e-04
Loss = 2.5859e-01, PNorm = 53.8369, GNorm = 1.0611, lr_0 = 9.2796e-04
Loss = 2.0993e-01, PNorm = 53.8562, GNorm = 2.1181, lr_0 = 9.2733e-04
Loss = 2.0466e-01, PNorm = 53.8776, GNorm = 3.0358, lr_0 = 9.2669e-04
Loss = 2.0538e-01, PNorm = 53.8987, GNorm = 0.8334, lr_0 = 9.2606e-04
Loss = 2.3492e-01, PNorm = 53.9276, GNorm = 2.5802, lr_0 = 9.2542e-04
Loss = 2.0316e-01, PNorm = 53.9521, GNorm = 1.6086, lr_0 = 9.2479e-04
Loss = 1.9810e-01, PNorm = 53.9859, GNorm = 0.6062, lr_0 = 9.2415e-04
Loss = 1.9212e-01, PNorm = 54.0160, GNorm = 0.9606, lr_0 = 9.2352e-04
Loss = 2.1991e-01, PNorm = 54.0411, GNorm = 2.4755, lr_0 = 9.2289e-04
Loss = 2.4388e-01, PNorm = 54.0749, GNorm = 0.6313, lr_0 = 9.2226e-04
Loss = 2.2409e-01, PNorm = 54.1052, GNorm = 1.2867, lr_0 = 9.2162e-04
Loss = 2.1023e-01, PNorm = 54.1379, GNorm = 2.4421, lr_0 = 9.2099e-04
Validation mae = 0.329590
Epoch 3
Loss = 2.1881e-01, PNorm = 54.1608, GNorm = 2.6532, lr_0 = 9.2036e-04
Loss = 2.1125e-01, PNorm = 54.1960, GNorm = 0.6992, lr_0 = 9.1973e-04
Loss = 1.9312e-01, PNorm = 54.2208, GNorm = 1.3284, lr_0 = 9.1910e-04
Loss = 2.1625e-01, PNorm = 54.2494, GNorm = 0.8051, lr_0 = 9.1847e-04
Loss = 2.0082e-01, PNorm = 54.2756, GNorm = 1.2786, lr_0 = 9.1784e-04
Loss = 2.0073e-01, PNorm = 54.3048, GNorm = 0.9736, lr_0 = 9.1721e-04
Loss = 2.4820e-01, PNorm = 54.3262, GNorm = 1.3534, lr_0 = 9.1658e-04
Loss = 2.0453e-01, PNorm = 54.3557, GNorm = 1.9814, lr_0 = 9.1596e-04
Loss = 2.2442e-01, PNorm = 54.3837, GNorm = 1.3404, lr_0 = 9.1533e-04
Loss = 2.1601e-01, PNorm = 54.4250, GNorm = 0.9815, lr_0 = 9.1470e-04
Loss = 1.9535e-01, PNorm = 54.4559, GNorm = 0.6483, lr_0 = 9.1408e-04
Loss = 2.1461e-01, PNorm = 54.4786, GNorm = 1.1622, lr_0 = 9.1345e-04
Loss = 2.2978e-01, PNorm = 54.5022, GNorm = 0.8563, lr_0 = 9.1282e-04
Loss = 2.1116e-01, PNorm = 54.5309, GNorm = 2.4458, lr_0 = 9.1220e-04
Loss = 2.2449e-01, PNorm = 54.5575, GNorm = 0.6026, lr_0 = 9.1157e-04
Loss = 1.9135e-01, PNorm = 54.5858, GNorm = 1.9535, lr_0 = 9.1095e-04
Loss = 1.9635e-01, PNorm = 54.6077, GNorm = 1.5474, lr_0 = 9.1032e-04
Loss = 2.1279e-01, PNorm = 54.6333, GNorm = 1.6315, lr_0 = 9.0970e-04
Loss = 1.9423e-01, PNorm = 54.6594, GNorm = 1.2419, lr_0 = 9.0908e-04
Loss = 1.8559e-01, PNorm = 54.6819, GNorm = 1.2257, lr_0 = 9.0846e-04
Loss = 1.9943e-01, PNorm = 54.7060, GNorm = 1.3556, lr_0 = 9.0783e-04
Loss = 2.0389e-01, PNorm = 54.7310, GNorm = 1.2098, lr_0 = 9.0721e-04
Loss = 2.3521e-01, PNorm = 54.7664, GNorm = 1.4610, lr_0 = 9.0659e-04
Loss = 2.1575e-01, PNorm = 54.8063, GNorm = 1.1875, lr_0 = 9.0597e-04
Loss = 2.0353e-01, PNorm = 54.8339, GNorm = 1.7689, lr_0 = 9.0535e-04
Loss = 1.6927e-01, PNorm = 54.8540, GNorm = 1.8777, lr_0 = 9.0473e-04
Loss = 1.9045e-01, PNorm = 54.8816, GNorm = 0.7403, lr_0 = 9.0411e-04
Loss = 2.2346e-01, PNorm = 54.9073, GNorm = 2.2109, lr_0 = 9.0349e-04
Loss = 2.1162e-01, PNorm = 54.9301, GNorm = 0.5718, lr_0 = 9.0287e-04
Loss = 2.0135e-01, PNorm = 54.9537, GNorm = 1.9131, lr_0 = 9.0225e-04
Loss = 2.0304e-01, PNorm = 54.9852, GNorm = 2.5597, lr_0 = 9.0163e-04
Loss = 2.4523e-01, PNorm = 55.0010, GNorm = 0.6927, lr_0 = 9.0102e-04
Loss = 2.2115e-01, PNorm = 55.0316, GNorm = 0.9181, lr_0 = 9.0040e-04
Loss = 2.1610e-01, PNorm = 55.0501, GNorm = 0.8124, lr_0 = 8.9978e-04
Loss = 1.7916e-01, PNorm = 55.0751, GNorm = 1.8048, lr_0 = 8.9916e-04
Loss = 2.1598e-01, PNorm = 55.0915, GNorm = 1.9901, lr_0 = 8.9855e-04
Loss = 1.9693e-01, PNorm = 55.1111, GNorm = 0.7721, lr_0 = 8.9793e-04
Loss = 1.9677e-01, PNorm = 55.1327, GNorm = 0.8190, lr_0 = 8.9732e-04
Loss = 2.0079e-01, PNorm = 55.1508, GNorm = 1.7782, lr_0 = 8.9670e-04
Loss = 2.0315e-01, PNorm = 55.1875, GNorm = 1.0230, lr_0 = 8.9609e-04
Loss = 1.8202e-01, PNorm = 55.2179, GNorm = 0.8573, lr_0 = 8.9548e-04
Loss = 1.9360e-01, PNorm = 55.2406, GNorm = 0.8686, lr_0 = 8.9486e-04
Loss = 1.9886e-01, PNorm = 55.2697, GNorm = 0.6815, lr_0 = 8.9425e-04
Loss = 1.8943e-01, PNorm = 55.2956, GNorm = 0.8193, lr_0 = 8.9364e-04
Loss = 2.0943e-01, PNorm = 55.3227, GNorm = 1.8946, lr_0 = 8.9302e-04
Loss = 2.1643e-01, PNorm = 55.3525, GNorm = 1.1396, lr_0 = 8.9241e-04
Loss = 2.3054e-01, PNorm = 55.3777, GNorm = 0.6719, lr_0 = 8.9180e-04
Loss = 2.4651e-01, PNorm = 55.4162, GNorm = 0.6673, lr_0 = 8.9119e-04
Loss = 2.0978e-01, PNorm = 55.4504, GNorm = 1.1415, lr_0 = 8.9058e-04
Loss = 2.2318e-01, PNorm = 55.4710, GNorm = 0.8540, lr_0 = 8.8997e-04
Loss = 2.0965e-01, PNorm = 55.4963, GNorm = 1.5448, lr_0 = 8.8936e-04
Loss = 2.0180e-01, PNorm = 55.5305, GNorm = 1.8587, lr_0 = 8.8875e-04
Loss = 2.2968e-01, PNorm = 55.5625, GNorm = 2.1646, lr_0 = 8.8814e-04
Loss = 1.9797e-01, PNorm = 55.5901, GNorm = 2.3078, lr_0 = 8.8753e-04
Loss = 1.8801e-01, PNorm = 55.6111, GNorm = 0.8414, lr_0 = 8.8693e-04
Loss = 2.0990e-01, PNorm = 55.6279, GNorm = 2.3643, lr_0 = 8.8632e-04
Loss = 2.4136e-01, PNorm = 55.6542, GNorm = 1.7298, lr_0 = 8.8571e-04
Loss = 2.0971e-01, PNorm = 55.6869, GNorm = 1.9499, lr_0 = 8.8510e-04
Loss = 2.0469e-01, PNorm = 55.7153, GNorm = 1.0138, lr_0 = 8.8450e-04
Loss = 2.1428e-01, PNorm = 55.7425, GNorm = 1.3154, lr_0 = 8.8389e-04
Loss = 1.8059e-01, PNorm = 55.7657, GNorm = 0.6793, lr_0 = 8.8329e-04
Loss = 1.9025e-01, PNorm = 55.7884, GNorm = 1.1723, lr_0 = 8.8268e-04
Loss = 1.8238e-01, PNorm = 55.8100, GNorm = 0.7269, lr_0 = 8.8208e-04
Loss = 1.9286e-01, PNorm = 55.8267, GNorm = 1.9778, lr_0 = 8.8147e-04
Loss = 1.9338e-01, PNorm = 55.8477, GNorm = 0.5164, lr_0 = 8.8087e-04
Loss = 1.8985e-01, PNorm = 55.8725, GNorm = 0.6813, lr_0 = 8.8026e-04
Loss = 2.0530e-01, PNorm = 55.8904, GNorm = 1.2688, lr_0 = 8.7966e-04
Loss = 2.2808e-01, PNorm = 55.9099, GNorm = 2.4191, lr_0 = 8.7906e-04
Loss = 2.0862e-01, PNorm = 55.9302, GNorm = 0.7928, lr_0 = 8.7846e-04
Loss = 1.9820e-01, PNorm = 55.9621, GNorm = 1.0443, lr_0 = 8.7785e-04
Loss = 1.9250e-01, PNorm = 55.9865, GNorm = 0.5982, lr_0 = 8.7725e-04
Loss = 2.2016e-01, PNorm = 56.0175, GNorm = 1.1920, lr_0 = 8.7665e-04
Loss = 2.1670e-01, PNorm = 56.0477, GNorm = 1.1514, lr_0 = 8.7605e-04
Loss = 1.9974e-01, PNorm = 56.0726, GNorm = 1.4367, lr_0 = 8.7545e-04
Loss = 1.8579e-01, PNorm = 56.0955, GNorm = 1.1983, lr_0 = 8.7485e-04
Loss = 1.9786e-01, PNorm = 56.1207, GNorm = 1.9578, lr_0 = 8.7425e-04
Loss = 1.8796e-01, PNorm = 56.1414, GNorm = 0.9844, lr_0 = 8.7365e-04
Loss = 2.1562e-01, PNorm = 56.1666, GNorm = 0.8317, lr_0 = 8.7306e-04
Loss = 2.0473e-01, PNorm = 56.1938, GNorm = 1.2464, lr_0 = 8.7246e-04
Loss = 2.1280e-01, PNorm = 56.2137, GNorm = 1.2392, lr_0 = 8.7186e-04
Loss = 1.9986e-01, PNorm = 56.2352, GNorm = 1.1307, lr_0 = 8.7126e-04
Loss = 1.9646e-01, PNorm = 56.2616, GNorm = 0.8360, lr_0 = 8.7067e-04
Loss = 2.0033e-01, PNorm = 56.2834, GNorm = 2.1212, lr_0 = 8.7007e-04
Loss = 2.0371e-01, PNorm = 56.3058, GNorm = 0.9752, lr_0 = 8.6947e-04
Loss = 1.8229e-01, PNorm = 56.3351, GNorm = 0.6194, lr_0 = 8.6888e-04
Loss = 1.9908e-01, PNorm = 56.3598, GNorm = 0.6922, lr_0 = 8.6828e-04
Loss = 1.7998e-01, PNorm = 56.3882, GNorm = 0.7360, lr_0 = 8.6769e-04
Loss = 2.1206e-01, PNorm = 56.4093, GNorm = 1.2649, lr_0 = 8.6709e-04
Loss = 1.8002e-01, PNorm = 56.4311, GNorm = 2.0289, lr_0 = 8.6650e-04
Loss = 2.2729e-01, PNorm = 56.4609, GNorm = 1.1099, lr_0 = 8.6590e-04
Loss = 2.0106e-01, PNorm = 56.4851, GNorm = 0.5314, lr_0 = 8.6531e-04
Loss = 1.8520e-01, PNorm = 56.5071, GNorm = 1.0472, lr_0 = 8.6472e-04
Loss = 2.3116e-01, PNorm = 56.5361, GNorm = 4.0167, lr_0 = 8.6413e-04
Loss = 2.1047e-01, PNorm = 56.5529, GNorm = 1.0328, lr_0 = 8.6353e-04
Loss = 1.7856e-01, PNorm = 56.5767, GNorm = 0.6642, lr_0 = 8.6294e-04
Loss = 1.9995e-01, PNorm = 56.5939, GNorm = 1.0881, lr_0 = 8.6235e-04
Loss = 2.1488e-01, PNorm = 56.6165, GNorm = 0.8803, lr_0 = 8.6176e-04
Loss = 2.1389e-01, PNorm = 56.6517, GNorm = 1.9115, lr_0 = 8.6117e-04
Loss = 1.9096e-01, PNorm = 56.6702, GNorm = 0.9305, lr_0 = 8.6058e-04
Loss = 1.9743e-01, PNorm = 56.6964, GNorm = 2.1408, lr_0 = 8.5999e-04
Loss = 1.8192e-01, PNorm = 56.7203, GNorm = 0.8717, lr_0 = 8.5940e-04
Loss = 1.7174e-01, PNorm = 56.7392, GNorm = 0.9147, lr_0 = 8.5881e-04
Loss = 2.0667e-01, PNorm = 56.7604, GNorm = 3.4399, lr_0 = 8.5823e-04
Loss = 2.0782e-01, PNorm = 56.7863, GNorm = 1.2534, lr_0 = 8.5764e-04
Loss = 2.0897e-01, PNorm = 56.8122, GNorm = 1.9710, lr_0 = 8.5705e-04
Loss = 2.3422e-01, PNorm = 56.8445, GNorm = 0.5490, lr_0 = 8.5646e-04
Loss = 2.4310e-01, PNorm = 56.8761, GNorm = 2.4551, lr_0 = 8.5588e-04
Loss = 2.2417e-01, PNorm = 56.9036, GNorm = 0.7971, lr_0 = 8.5529e-04
Loss = 2.1565e-01, PNorm = 56.9264, GNorm = 1.3471, lr_0 = 8.5470e-04
Loss = 2.0199e-01, PNorm = 56.9536, GNorm = 0.6761, lr_0 = 8.5412e-04
Loss = 2.2154e-01, PNorm = 56.9759, GNorm = 1.1861, lr_0 = 8.5353e-04
Loss = 2.3354e-01, PNorm = 57.0162, GNorm = 1.2695, lr_0 = 8.5295e-04
Loss = 1.8338e-01, PNorm = 57.0386, GNorm = 0.9288, lr_0 = 8.5236e-04
Loss = 1.9395e-01, PNorm = 57.0612, GNorm = 1.2992, lr_0 = 8.5178e-04
Loss = 2.0841e-01, PNorm = 57.0856, GNorm = 1.8564, lr_0 = 8.5120e-04
Loss = 2.3259e-01, PNorm = 57.1189, GNorm = 2.0422, lr_0 = 8.5061e-04
Loss = 2.0694e-01, PNorm = 57.1497, GNorm = 0.7835, lr_0 = 8.5003e-04
Loss = 2.3097e-01, PNorm = 57.1786, GNorm = 1.5667, lr_0 = 8.4945e-04
Loss = 1.6498e-01, PNorm = 57.2050, GNorm = 0.4625, lr_0 = 8.4887e-04
Loss = 1.9806e-01, PNorm = 57.2249, GNorm = 0.9070, lr_0 = 8.4828e-04
Validation mae = 0.260644
Epoch 4
Loss = 1.8149e-01, PNorm = 57.2462, GNorm = 0.9707, lr_0 = 8.4770e-04
Loss = 1.7876e-01, PNorm = 57.2671, GNorm = 0.7248, lr_0 = 8.4712e-04
Loss = 1.6742e-01, PNorm = 57.2851, GNorm = 1.0379, lr_0 = 8.4654e-04
Loss = 1.7056e-01, PNorm = 57.3163, GNorm = 1.2679, lr_0 = 8.4596e-04
Loss = 1.7848e-01, PNorm = 57.3429, GNorm = 1.1437, lr_0 = 8.4538e-04
Loss = 2.0870e-01, PNorm = 57.3618, GNorm = 1.7044, lr_0 = 8.4480e-04
Loss = 1.7619e-01, PNorm = 57.3883, GNorm = 1.8814, lr_0 = 8.4423e-04
Loss = 1.9098e-01, PNorm = 57.4073, GNorm = 0.8531, lr_0 = 8.4365e-04
Loss = 1.7976e-01, PNorm = 57.4320, GNorm = 1.0854, lr_0 = 8.4307e-04
Loss = 1.8743e-01, PNorm = 57.4616, GNorm = 1.1267, lr_0 = 8.4249e-04
Loss = 1.8776e-01, PNorm = 57.4873, GNorm = 0.5600, lr_0 = 8.4191e-04
Loss = 1.7522e-01, PNorm = 57.5103, GNorm = 0.6861, lr_0 = 8.4134e-04
Loss = 1.7862e-01, PNorm = 57.5324, GNorm = 0.9438, lr_0 = 8.4076e-04
Loss = 1.7460e-01, PNorm = 57.5615, GNorm = 0.8959, lr_0 = 8.4019e-04
Loss = 1.8513e-01, PNorm = 57.5878, GNorm = 1.1967, lr_0 = 8.3961e-04
Loss = 1.9446e-01, PNorm = 57.6166, GNorm = 0.9318, lr_0 = 8.3903e-04
Loss = 2.2385e-01, PNorm = 57.6372, GNorm = 1.0911, lr_0 = 8.3846e-04
Loss = 2.0577e-01, PNorm = 57.6647, GNorm = 0.7008, lr_0 = 8.3789e-04
Loss = 1.8543e-01, PNorm = 57.6843, GNorm = 0.9268, lr_0 = 8.3731e-04
Loss = 1.7682e-01, PNorm = 57.7047, GNorm = 0.5854, lr_0 = 8.3674e-04
Loss = 1.5608e-01, PNorm = 57.7215, GNorm = 0.5926, lr_0 = 8.3616e-04
Loss = 2.0713e-01, PNorm = 57.7455, GNorm = 0.9083, lr_0 = 8.3559e-04
Loss = 1.8876e-01, PNorm = 57.7647, GNorm = 0.7207, lr_0 = 8.3502e-04
Loss = 1.9815e-01, PNorm = 57.7929, GNorm = 2.3052, lr_0 = 8.3445e-04
Loss = 1.8540e-01, PNorm = 57.8162, GNorm = 0.8349, lr_0 = 8.3388e-04
Loss = 1.8619e-01, PNorm = 57.8402, GNorm = 1.1126, lr_0 = 8.3330e-04
Loss = 1.9483e-01, PNorm = 57.8673, GNorm = 0.9424, lr_0 = 8.3273e-04
Loss = 1.8687e-01, PNorm = 57.8875, GNorm = 0.7616, lr_0 = 8.3216e-04
Loss = 1.8652e-01, PNorm = 57.9062, GNorm = 0.8295, lr_0 = 8.3159e-04
Loss = 2.2178e-01, PNorm = 57.9339, GNorm = 1.0500, lr_0 = 8.3102e-04
Loss = 1.7644e-01, PNorm = 57.9608, GNorm = 1.1953, lr_0 = 8.3045e-04
Loss = 1.7824e-01, PNorm = 57.9843, GNorm = 0.7467, lr_0 = 8.2988e-04
Loss = 1.7385e-01, PNorm = 58.0122, GNorm = 0.9920, lr_0 = 8.2932e-04
Loss = 1.6657e-01, PNorm = 58.0336, GNorm = 1.2222, lr_0 = 8.2875e-04
Loss = 1.9125e-01, PNorm = 58.0583, GNorm = 0.8368, lr_0 = 8.2818e-04
Loss = 2.0322e-01, PNorm = 58.0887, GNorm = 0.8193, lr_0 = 8.2761e-04
Loss = 1.7879e-01, PNorm = 58.1197, GNorm = 0.8648, lr_0 = 8.2705e-04
Loss = 1.9584e-01, PNorm = 58.1463, GNorm = 1.1164, lr_0 = 8.2648e-04
Loss = 1.8025e-01, PNorm = 58.1712, GNorm = 0.7028, lr_0 = 8.2591e-04
Loss = 1.9098e-01, PNorm = 58.1926, GNorm = 2.1788, lr_0 = 8.2535e-04
Loss = 1.9574e-01, PNorm = 58.2321, GNorm = 0.6787, lr_0 = 8.2478e-04
Loss = 1.6913e-01, PNorm = 58.2594, GNorm = 0.8956, lr_0 = 8.2422e-04
Loss = 1.8010e-01, PNorm = 58.2890, GNorm = 1.8051, lr_0 = 8.2365e-04
Loss = 1.9729e-01, PNorm = 58.3192, GNorm = 0.8908, lr_0 = 8.2309e-04
Loss = 2.1177e-01, PNorm = 58.3502, GNorm = 0.8056, lr_0 = 8.2252e-04
Loss = 1.7905e-01, PNorm = 58.3762, GNorm = 1.1617, lr_0 = 8.2196e-04
Loss = 1.6487e-01, PNorm = 58.3904, GNorm = 0.9500, lr_0 = 8.2140e-04
Loss = 1.8262e-01, PNorm = 58.4031, GNorm = 1.4146, lr_0 = 8.2084e-04
Loss = 1.6694e-01, PNorm = 58.4244, GNorm = 1.6710, lr_0 = 8.2027e-04
Loss = 1.8756e-01, PNorm = 58.4542, GNorm = 1.0363, lr_0 = 8.1971e-04
Loss = 1.8039e-01, PNorm = 58.4868, GNorm = 0.9648, lr_0 = 8.1915e-04
Loss = 1.8946e-01, PNorm = 58.5138, GNorm = 1.6761, lr_0 = 8.1859e-04
Loss = 1.7627e-01, PNorm = 58.5420, GNorm = 1.2215, lr_0 = 8.1803e-04
Loss = 1.8839e-01, PNorm = 58.5670, GNorm = 2.1216, lr_0 = 8.1747e-04
Loss = 2.1139e-01, PNorm = 58.5870, GNorm = 1.4748, lr_0 = 8.1691e-04
Loss = 2.1974e-01, PNorm = 58.6206, GNorm = 2.5375, lr_0 = 8.1635e-04
Loss = 1.9857e-01, PNorm = 58.6479, GNorm = 1.5962, lr_0 = 8.1579e-04
Loss = 1.9394e-01, PNorm = 58.6830, GNorm = 2.5886, lr_0 = 8.1523e-04
Loss = 1.6767e-01, PNorm = 58.7015, GNorm = 0.8517, lr_0 = 8.1467e-04
Loss = 1.8081e-01, PNorm = 58.7252, GNorm = 0.7711, lr_0 = 8.1411e-04
Loss = 1.7163e-01, PNorm = 58.7519, GNorm = 1.4140, lr_0 = 8.1355e-04
Loss = 2.0429e-01, PNorm = 58.7809, GNorm = 1.2026, lr_0 = 8.1300e-04
Loss = 1.8122e-01, PNorm = 58.8014, GNorm = 1.3120, lr_0 = 8.1244e-04
Loss = 1.9043e-01, PNorm = 58.8297, GNorm = 1.7046, lr_0 = 8.1188e-04
Loss = 1.7701e-01, PNorm = 58.8485, GNorm = 0.6884, lr_0 = 8.1133e-04
Loss = 1.8399e-01, PNorm = 58.8770, GNorm = 0.6389, lr_0 = 8.1077e-04
Loss = 1.8180e-01, PNorm = 58.9047, GNorm = 1.6462, lr_0 = 8.1022e-04
Loss = 1.7345e-01, PNorm = 58.9301, GNorm = 1.2499, lr_0 = 8.0966e-04
Loss = 1.9003e-01, PNorm = 58.9525, GNorm = 0.7854, lr_0 = 8.0911e-04
Loss = 1.9383e-01, PNorm = 58.9832, GNorm = 1.0973, lr_0 = 8.0855e-04
Loss = 1.9350e-01, PNorm = 59.0064, GNorm = 0.9149, lr_0 = 8.0800e-04
Loss = 1.7559e-01, PNorm = 59.0299, GNorm = 1.9036, lr_0 = 8.0745e-04
Loss = 2.1648e-01, PNorm = 59.0541, GNorm = 0.7045, lr_0 = 8.0689e-04
Loss = 1.8135e-01, PNorm = 59.0946, GNorm = 1.1432, lr_0 = 8.0634e-04
Loss = 1.9338e-01, PNorm = 59.1191, GNorm = 0.6553, lr_0 = 8.0579e-04
Loss = 1.8549e-01, PNorm = 59.1389, GNorm = 0.8421, lr_0 = 8.0523e-04
Loss = 1.6976e-01, PNorm = 59.1627, GNorm = 1.0398, lr_0 = 8.0468e-04
Loss = 1.8893e-01, PNorm = 59.1884, GNorm = 0.7022, lr_0 = 8.0413e-04
Loss = 1.8100e-01, PNorm = 59.2079, GNorm = 0.6969, lr_0 = 8.0358e-04
Loss = 2.0780e-01, PNorm = 59.2315, GNorm = 1.6120, lr_0 = 8.0303e-04
Loss = 1.7879e-01, PNorm = 59.2625, GNorm = 1.2719, lr_0 = 8.0248e-04
Loss = 1.9401e-01, PNorm = 59.2894, GNorm = 0.8030, lr_0 = 8.0193e-04
Loss = 1.9966e-01, PNorm = 59.3208, GNorm = 1.2806, lr_0 = 8.0138e-04
Loss = 2.3064e-01, PNorm = 59.3483, GNorm = 0.7013, lr_0 = 8.0083e-04
Loss = 1.8013e-01, PNorm = 59.3736, GNorm = 1.0456, lr_0 = 8.0028e-04
Loss = 1.8885e-01, PNorm = 59.3938, GNorm = 1.4556, lr_0 = 7.9974e-04
Loss = 2.0218e-01, PNorm = 59.4173, GNorm = 1.4758, lr_0 = 7.9919e-04
Loss = 1.7269e-01, PNorm = 59.4448, GNorm = 1.5245, lr_0 = 7.9864e-04
Loss = 1.7034e-01, PNorm = 59.4664, GNorm = 0.7693, lr_0 = 7.9809e-04
Loss = 1.7410e-01, PNorm = 59.4884, GNorm = 1.5014, lr_0 = 7.9755e-04
Loss = 1.7593e-01, PNorm = 59.5077, GNorm = 1.4908, lr_0 = 7.9700e-04
Loss = 1.9741e-01, PNorm = 59.5332, GNorm = 0.5664, lr_0 = 7.9645e-04
Loss = 1.7630e-01, PNorm = 59.5499, GNorm = 0.7620, lr_0 = 7.9591e-04
Loss = 1.6777e-01, PNorm = 59.5703, GNorm = 0.8236, lr_0 = 7.9536e-04
Loss = 1.9390e-01, PNorm = 59.5911, GNorm = 1.3197, lr_0 = 7.9482e-04
Loss = 1.9199e-01, PNorm = 59.6174, GNorm = 1.3031, lr_0 = 7.9427e-04
Loss = 2.2421e-01, PNorm = 59.6487, GNorm = 0.8649, lr_0 = 7.9373e-04
Loss = 2.0372e-01, PNorm = 59.6824, GNorm = 2.0023, lr_0 = 7.9319e-04
Loss = 1.8619e-01, PNorm = 59.7052, GNorm = 0.8078, lr_0 = 7.9264e-04
Loss = 2.0735e-01, PNorm = 59.7290, GNorm = 0.8653, lr_0 = 7.9210e-04
Loss = 2.0367e-01, PNorm = 59.7430, GNorm = 0.7906, lr_0 = 7.9156e-04
Loss = 1.8407e-01, PNorm = 59.7678, GNorm = 1.6827, lr_0 = 7.9101e-04
Loss = 2.1416e-01, PNorm = 59.7882, GNorm = 0.8740, lr_0 = 7.9047e-04
Loss = 1.7593e-01, PNorm = 59.8166, GNorm = 1.2260, lr_0 = 7.8993e-04
Loss = 1.9564e-01, PNorm = 59.8374, GNorm = 0.7841, lr_0 = 7.8939e-04
Loss = 1.8579e-01, PNorm = 59.8550, GNorm = 1.0375, lr_0 = 7.8885e-04
Loss = 1.8836e-01, PNorm = 59.8783, GNorm = 1.0357, lr_0 = 7.8831e-04
Loss = 1.7645e-01, PNorm = 59.9101, GNorm = 0.6362, lr_0 = 7.8777e-04
Loss = 1.8021e-01, PNorm = 59.9299, GNorm = 0.9278, lr_0 = 7.8723e-04
Loss = 1.7102e-01, PNorm = 59.9522, GNorm = 0.8426, lr_0 = 7.8669e-04
Loss = 1.8203e-01, PNorm = 59.9787, GNorm = 1.3239, lr_0 = 7.8615e-04
Loss = 1.7503e-01, PNorm = 59.9929, GNorm = 1.5698, lr_0 = 7.8561e-04
Loss = 1.9335e-01, PNorm = 60.0173, GNorm = 0.6839, lr_0 = 7.8507e-04
Loss = 1.8577e-01, PNorm = 60.0402, GNorm = 0.6647, lr_0 = 7.8454e-04
Loss = 2.0353e-01, PNorm = 60.0647, GNorm = 1.7661, lr_0 = 7.8400e-04
Loss = 2.2081e-01, PNorm = 60.0890, GNorm = 0.7893, lr_0 = 7.8346e-04
Loss = 1.8485e-01, PNorm = 60.1233, GNorm = 0.9254, lr_0 = 7.8293e-04
Loss = 1.8439e-01, PNorm = 60.1449, GNorm = 0.8358, lr_0 = 7.8239e-04
Loss = 1.6399e-01, PNorm = 60.1678, GNorm = 0.4336, lr_0 = 7.8185e-04
Loss = 1.8202e-01, PNorm = 60.1902, GNorm = 0.7581, lr_0 = 7.8132e-04
Validation mae = 0.254879
Epoch 5
Loss = 1.6088e-01, PNorm = 60.2075, GNorm = 1.1713, lr_0 = 7.8078e-04
Loss = 1.7307e-01, PNorm = 60.2336, GNorm = 1.2922, lr_0 = 7.8025e-04
Loss = 2.4607e-01, PNorm = 60.2529, GNorm = 0.6856, lr_0 = 7.7971e-04
Loss = 1.8247e-01, PNorm = 60.2813, GNorm = 0.9871, lr_0 = 7.7918e-04
Loss = 1.6717e-01, PNorm = 60.3005, GNorm = 0.8639, lr_0 = 7.7864e-04
Loss = 1.7823e-01, PNorm = 60.3227, GNorm = 0.5967, lr_0 = 7.7811e-04
Loss = 1.6378e-01, PNorm = 60.3548, GNorm = 1.2982, lr_0 = 7.7758e-04
Loss = 1.6661e-01, PNorm = 60.3752, GNorm = 0.5899, lr_0 = 7.7705e-04
Loss = 1.6418e-01, PNorm = 60.3977, GNorm = 0.5953, lr_0 = 7.7651e-04
Loss = 2.1654e-01, PNorm = 60.4302, GNorm = 0.7724, lr_0 = 7.7598e-04
Loss = 1.8859e-01, PNorm = 60.4669, GNorm = 1.6393, lr_0 = 7.7545e-04
Loss = 1.7700e-01, PNorm = 60.4906, GNorm = 0.9677, lr_0 = 7.7492e-04
Loss = 1.8988e-01, PNorm = 60.5158, GNorm = 0.8811, lr_0 = 7.7439e-04
Loss = 1.9847e-01, PNorm = 60.5459, GNorm = 0.8181, lr_0 = 7.7386e-04
Loss = 1.7242e-01, PNorm = 60.5807, GNorm = 0.8698, lr_0 = 7.7333e-04
Loss = 1.5081e-01, PNorm = 60.6036, GNorm = 1.0166, lr_0 = 7.7280e-04
Loss = 1.8277e-01, PNorm = 60.6231, GNorm = 0.6417, lr_0 = 7.7227e-04
Loss = 1.5871e-01, PNorm = 60.6455, GNorm = 0.6029, lr_0 = 7.7174e-04
Loss = 1.9531e-01, PNorm = 60.6640, GNorm = 0.8333, lr_0 = 7.7121e-04
Loss = 1.7061e-01, PNorm = 60.6847, GNorm = 1.1870, lr_0 = 7.7068e-04
Loss = 1.9072e-01, PNorm = 60.7101, GNorm = 1.2507, lr_0 = 7.7015e-04
Loss = 1.6053e-01, PNorm = 60.7360, GNorm = 1.0018, lr_0 = 7.6963e-04
Loss = 1.7277e-01, PNorm = 60.7591, GNorm = 1.3083, lr_0 = 7.6910e-04
Loss = 1.6939e-01, PNorm = 60.7897, GNorm = 1.0678, lr_0 = 7.6857e-04
Loss = 1.9338e-01, PNorm = 60.8161, GNorm = 1.3930, lr_0 = 7.6805e-04
Loss = 1.8469e-01, PNorm = 60.8433, GNorm = 0.6796, lr_0 = 7.6752e-04
Loss = 1.7224e-01, PNorm = 60.8739, GNorm = 0.7635, lr_0 = 7.6699e-04
Loss = 1.5911e-01, PNorm = 60.9002, GNorm = 0.7578, lr_0 = 7.6647e-04
Loss = 1.7275e-01, PNorm = 60.9268, GNorm = 0.9137, lr_0 = 7.6594e-04
Loss = 1.7483e-01, PNorm = 60.9523, GNorm = 0.6632, lr_0 = 7.6542e-04
Loss = 1.7252e-01, PNorm = 60.9764, GNorm = 0.7690, lr_0 = 7.6489e-04
Loss = 1.8915e-01, PNorm = 61.0053, GNorm = 1.0322, lr_0 = 7.6437e-04
Loss = 1.5945e-01, PNorm = 61.0404, GNorm = 0.9666, lr_0 = 7.6385e-04
Loss = 1.6972e-01, PNorm = 61.0721, GNorm = 1.0726, lr_0 = 7.6332e-04
Loss = 1.7945e-01, PNorm = 61.0952, GNorm = 1.5023, lr_0 = 7.6280e-04
Loss = 1.6357e-01, PNorm = 61.1126, GNorm = 0.6528, lr_0 = 7.6228e-04
Loss = 1.7054e-01, PNorm = 61.1331, GNorm = 1.9362, lr_0 = 7.6176e-04
Loss = 2.0332e-01, PNorm = 61.1575, GNorm = 1.2116, lr_0 = 7.6123e-04
Loss = 1.7325e-01, PNorm = 61.1910, GNorm = 1.8140, lr_0 = 7.6071e-04
Loss = 1.9473e-01, PNorm = 61.2114, GNorm = 0.8212, lr_0 = 7.6019e-04
Loss = 1.7853e-01, PNorm = 61.2339, GNorm = 1.8593, lr_0 = 7.5967e-04
Loss = 1.8451e-01, PNorm = 61.2589, GNorm = 0.9129, lr_0 = 7.5915e-04
Loss = 1.9670e-01, PNorm = 61.2972, GNorm = 1.6299, lr_0 = 7.5863e-04
Loss = 1.6587e-01, PNorm = 61.3215, GNorm = 0.9441, lr_0 = 7.5811e-04
Loss = 1.6417e-01, PNorm = 61.3490, GNorm = 1.8096, lr_0 = 7.5759e-04
Loss = 1.6644e-01, PNorm = 61.3713, GNorm = 0.8876, lr_0 = 7.5707e-04
Loss = 1.7417e-01, PNorm = 61.3904, GNorm = 0.6623, lr_0 = 7.5655e-04
Loss = 1.5756e-01, PNorm = 61.4084, GNorm = 0.7609, lr_0 = 7.5603e-04
Loss = 1.8590e-01, PNorm = 61.4358, GNorm = 1.0974, lr_0 = 7.5552e-04
Loss = 1.6442e-01, PNorm = 61.4592, GNorm = 1.0865, lr_0 = 7.5500e-04
Loss = 1.7273e-01, PNorm = 61.4855, GNorm = 0.5788, lr_0 = 7.5448e-04
Loss = 1.7979e-01, PNorm = 61.5126, GNorm = 1.4097, lr_0 = 7.5397e-04
Loss = 1.7350e-01, PNorm = 61.5358, GNorm = 0.8550, lr_0 = 7.5345e-04
Loss = 1.6553e-01, PNorm = 61.5522, GNorm = 0.9666, lr_0 = 7.5293e-04
Loss = 2.0299e-01, PNorm = 61.5762, GNorm = 0.8820, lr_0 = 7.5242e-04
Loss = 1.7927e-01, PNorm = 61.6067, GNorm = 0.9187, lr_0 = 7.5190e-04
Loss = 1.6368e-01, PNorm = 61.6350, GNorm = 1.2218, lr_0 = 7.5139e-04
Loss = 1.9012e-01, PNorm = 61.6632, GNorm = 0.7075, lr_0 = 7.5087e-04
Loss = 1.6912e-01, PNorm = 61.6875, GNorm = 0.6093, lr_0 = 7.5036e-04
Loss = 1.7376e-01, PNorm = 61.7109, GNorm = 0.5358, lr_0 = 7.4984e-04
Loss = 1.8501e-01, PNorm = 61.7335, GNorm = 1.2321, lr_0 = 7.4933e-04
Loss = 1.6279e-01, PNorm = 61.7557, GNorm = 1.0763, lr_0 = 7.4882e-04
Loss = 1.5479e-01, PNorm = 61.7766, GNorm = 0.6050, lr_0 = 7.4830e-04
Loss = 1.5622e-01, PNorm = 61.7858, GNorm = 0.6260, lr_0 = 7.4779e-04
Loss = 1.7461e-01, PNorm = 61.7994, GNorm = 0.9277, lr_0 = 7.4728e-04
Loss = 1.5531e-01, PNorm = 61.8255, GNorm = 0.8095, lr_0 = 7.4677e-04
Loss = 1.8181e-01, PNorm = 61.8463, GNorm = 0.6607, lr_0 = 7.4625e-04
Loss = 1.6321e-01, PNorm = 61.8700, GNorm = 1.0836, lr_0 = 7.4574e-04
Loss = 1.6775e-01, PNorm = 61.8943, GNorm = 1.6695, lr_0 = 7.4523e-04
Loss = 1.9955e-01, PNorm = 61.9230, GNorm = 0.8653, lr_0 = 7.4472e-04
Loss = 1.7964e-01, PNorm = 61.9538, GNorm = 1.4849, lr_0 = 7.4421e-04
Loss = 2.0992e-01, PNorm = 61.9836, GNorm = 1.5920, lr_0 = 7.4370e-04
Loss = 2.3283e-01, PNorm = 62.0152, GNorm = 1.3144, lr_0 = 7.4319e-04
Loss = 1.9126e-01, PNorm = 62.0451, GNorm = 1.2529, lr_0 = 7.4268e-04
Loss = 1.9565e-01, PNorm = 62.0694, GNorm = 0.9775, lr_0 = 7.4217e-04
Loss = 1.5902e-01, PNorm = 62.1016, GNorm = 0.9032, lr_0 = 7.4167e-04
Loss = 1.9222e-01, PNorm = 62.1304, GNorm = 0.6601, lr_0 = 7.4116e-04
Loss = 2.0677e-01, PNorm = 62.1660, GNorm = 1.2871, lr_0 = 7.4065e-04
Loss = 2.0278e-01, PNorm = 62.1944, GNorm = 1.4135, lr_0 = 7.4014e-04
Loss = 1.7737e-01, PNorm = 62.2226, GNorm = 0.5399, lr_0 = 7.3964e-04
Loss = 1.6559e-01, PNorm = 62.2429, GNorm = 0.6210, lr_0 = 7.3913e-04
Loss = 1.7711e-01, PNorm = 62.2638, GNorm = 0.8446, lr_0 = 7.3862e-04
Loss = 1.7339e-01, PNorm = 62.2869, GNorm = 1.0965, lr_0 = 7.3812e-04
Loss = 1.7985e-01, PNorm = 62.3094, GNorm = 0.8460, lr_0 = 7.3761e-04
Loss = 1.6191e-01, PNorm = 62.3401, GNorm = 1.1369, lr_0 = 7.3711e-04
Loss = 1.6555e-01, PNorm = 62.3641, GNorm = 1.0318, lr_0 = 7.3660e-04
Loss = 1.6525e-01, PNorm = 62.3815, GNorm = 0.6526, lr_0 = 7.3610e-04
Loss = 1.6765e-01, PNorm = 62.3995, GNorm = 0.5932, lr_0 = 7.3559e-04
Loss = 1.7903e-01, PNorm = 62.4230, GNorm = 1.1630, lr_0 = 7.3509e-04
Loss = 1.9777e-01, PNorm = 62.4531, GNorm = 0.9111, lr_0 = 7.3458e-04
Loss = 1.7060e-01, PNorm = 62.4808, GNorm = 1.8754, lr_0 = 7.3408e-04
Loss = 1.9109e-01, PNorm = 62.5075, GNorm = 1.6350, lr_0 = 7.3358e-04
Loss = 1.6114e-01, PNorm = 62.5299, GNorm = 1.4919, lr_0 = 7.3308e-04
Loss = 1.6048e-01, PNorm = 62.5506, GNorm = 0.7172, lr_0 = 7.3257e-04
Loss = 1.7948e-01, PNorm = 62.5723, GNorm = 0.6575, lr_0 = 7.3207e-04
Loss = 1.8033e-01, PNorm = 62.5949, GNorm = 0.9633, lr_0 = 7.3157e-04
Loss = 1.7235e-01, PNorm = 62.6064, GNorm = 0.7037, lr_0 = 7.3107e-04
Loss = 1.6502e-01, PNorm = 62.6269, GNorm = 0.5437, lr_0 = 7.3057e-04
Loss = 2.0108e-01, PNorm = 62.6439, GNorm = 0.7527, lr_0 = 7.3007e-04
Loss = 1.8328e-01, PNorm = 62.6573, GNorm = 0.7282, lr_0 = 7.2957e-04
Loss = 1.7376e-01, PNorm = 62.6713, GNorm = 1.1082, lr_0 = 7.2907e-04
Loss = 1.6967e-01, PNorm = 62.7053, GNorm = 0.7197, lr_0 = 7.2857e-04
Loss = 1.8564e-01, PNorm = 62.7358, GNorm = 1.6753, lr_0 = 7.2807e-04
Loss = 1.8155e-01, PNorm = 62.7563, GNorm = 1.6112, lr_0 = 7.2757e-04
Loss = 1.6109e-01, PNorm = 62.7748, GNorm = 0.5686, lr_0 = 7.2707e-04
Loss = 1.7009e-01, PNorm = 62.7938, GNorm = 2.0494, lr_0 = 7.2657e-04
Loss = 1.8271e-01, PNorm = 62.8111, GNorm = 1.3749, lr_0 = 7.2608e-04
Loss = 1.8653e-01, PNorm = 62.8338, GNorm = 0.7823, lr_0 = 7.2558e-04
Loss = 1.5650e-01, PNorm = 62.8539, GNorm = 0.9506, lr_0 = 7.2508e-04
Loss = 1.8020e-01, PNorm = 62.8699, GNorm = 0.7042, lr_0 = 7.2458e-04
Loss = 1.6049e-01, PNorm = 62.8864, GNorm = 0.8662, lr_0 = 7.2409e-04
Loss = 1.6892e-01, PNorm = 62.9023, GNorm = 1.0049, lr_0 = 7.2359e-04
Loss = 1.5967e-01, PNorm = 62.9278, GNorm = 0.6417, lr_0 = 7.2310e-04
Loss = 1.7779e-01, PNorm = 62.9503, GNorm = 1.0579, lr_0 = 7.2260e-04
Loss = 2.0270e-01, PNorm = 62.9757, GNorm = 1.1646, lr_0 = 7.2211e-04
Loss = 1.8064e-01, PNorm = 63.0003, GNorm = 1.3473, lr_0 = 7.2161e-04
Loss = 1.7174e-01, PNorm = 63.0317, GNorm = 1.0004, lr_0 = 7.2112e-04
Loss = 1.6784e-01, PNorm = 63.0587, GNorm = 0.8277, lr_0 = 7.2062e-04
Loss = 1.8933e-01, PNorm = 63.0742, GNorm = 0.8918, lr_0 = 7.2013e-04
Loss = 1.6357e-01, PNorm = 63.0996, GNorm = 0.8629, lr_0 = 7.1964e-04
Validation mae = 0.259200
Epoch 6
Loss = 2.1119e-01, PNorm = 63.1148, GNorm = 0.5623, lr_0 = 7.1914e-04
Loss = 1.7176e-01, PNorm = 63.1365, GNorm = 1.7807, lr_0 = 7.1865e-04
Loss = 2.0543e-01, PNorm = 63.1646, GNorm = 0.6283, lr_0 = 7.1816e-04
Loss = 1.7627e-01, PNorm = 63.1906, GNorm = 0.6699, lr_0 = 7.1767e-04
Loss = 1.5527e-01, PNorm = 63.2181, GNorm = 0.6499, lr_0 = 7.1717e-04
Loss = 1.7559e-01, PNorm = 63.2312, GNorm = 0.5714, lr_0 = 7.1668e-04
Loss = 1.5525e-01, PNorm = 63.2473, GNorm = 0.8691, lr_0 = 7.1619e-04
Loss = 1.7273e-01, PNorm = 63.2737, GNorm = 0.5569, lr_0 = 7.1570e-04
Loss = 1.5615e-01, PNorm = 63.2984, GNorm = 0.9443, lr_0 = 7.1521e-04
Loss = 1.6483e-01, PNorm = 63.3187, GNorm = 0.6396, lr_0 = 7.1472e-04
Loss = 1.4500e-01, PNorm = 63.3441, GNorm = 1.0289, lr_0 = 7.1423e-04
Loss = 1.7168e-01, PNorm = 63.3646, GNorm = 1.2023, lr_0 = 7.1374e-04
Loss = 1.6549e-01, PNorm = 63.3851, GNorm = 1.2042, lr_0 = 7.1325e-04
Loss = 1.7683e-01, PNorm = 63.3985, GNorm = 0.9216, lr_0 = 7.1277e-04
Loss = 1.5243e-01, PNorm = 63.4169, GNorm = 0.9567, lr_0 = 7.1228e-04
Loss = 1.6429e-01, PNorm = 63.4349, GNorm = 1.6737, lr_0 = 7.1179e-04
Loss = 1.6913e-01, PNorm = 63.4651, GNorm = 0.6581, lr_0 = 7.1130e-04
Loss = 1.5878e-01, PNorm = 63.4929, GNorm = 0.5213, lr_0 = 7.1081e-04
Loss = 1.7337e-01, PNorm = 63.5157, GNorm = 0.8505, lr_0 = 7.1033e-04
Loss = 1.6763e-01, PNorm = 63.5415, GNorm = 0.8243, lr_0 = 7.0984e-04
Loss = 1.6854e-01, PNorm = 63.5617, GNorm = 0.8975, lr_0 = 7.0935e-04
Loss = 1.8134e-01, PNorm = 63.5864, GNorm = 0.6000, lr_0 = 7.0887e-04
Loss = 1.7022e-01, PNorm = 63.6066, GNorm = 0.5432, lr_0 = 7.0838e-04
Loss = 1.6588e-01, PNorm = 63.6297, GNorm = 1.4400, lr_0 = 7.0790e-04
Loss = 1.6626e-01, PNorm = 63.6577, GNorm = 1.1781, lr_0 = 7.0741e-04
Loss = 1.6908e-01, PNorm = 63.6819, GNorm = 0.6107, lr_0 = 7.0693e-04
Loss = 1.5056e-01, PNorm = 63.7068, GNorm = 0.6310, lr_0 = 7.0644e-04
Loss = 1.7853e-01, PNorm = 63.7320, GNorm = 0.6860, lr_0 = 7.0596e-04
Loss = 1.5339e-01, PNorm = 63.7594, GNorm = 0.5606, lr_0 = 7.0548e-04
Loss = 1.7696e-01, PNorm = 63.7784, GNorm = 0.6426, lr_0 = 7.0499e-04
Loss = 1.4147e-01, PNorm = 63.7959, GNorm = 0.7437, lr_0 = 7.0451e-04
Loss = 1.8277e-01, PNorm = 63.8146, GNorm = 1.3021, lr_0 = 7.0403e-04
Loss = 1.8946e-01, PNorm = 63.8366, GNorm = 1.0715, lr_0 = 7.0354e-04
Loss = 1.4859e-01, PNorm = 63.8595, GNorm = 0.9774, lr_0 = 7.0306e-04
Loss = 1.5881e-01, PNorm = 63.8786, GNorm = 0.9373, lr_0 = 7.0258e-04
Loss = 1.4717e-01, PNorm = 63.9010, GNorm = 1.5208, lr_0 = 7.0210e-04
Loss = 1.6621e-01, PNorm = 63.9265, GNorm = 0.7071, lr_0 = 7.0162e-04
Loss = 1.8380e-01, PNorm = 63.9477, GNorm = 1.6538, lr_0 = 7.0114e-04
Loss = 1.6876e-01, PNorm = 63.9721, GNorm = 1.3264, lr_0 = 7.0066e-04
Loss = 1.7642e-01, PNorm = 64.0008, GNorm = 1.5773, lr_0 = 7.0018e-04
Loss = 1.3727e-01, PNorm = 64.0234, GNorm = 0.8159, lr_0 = 6.9970e-04
Loss = 1.5125e-01, PNorm = 64.0418, GNorm = 0.6486, lr_0 = 6.9922e-04
Loss = 1.6734e-01, PNorm = 64.0635, GNorm = 1.8765, lr_0 = 6.9874e-04
Loss = 1.7440e-01, PNorm = 64.0921, GNorm = 0.7132, lr_0 = 6.9826e-04
Loss = 1.4772e-01, PNorm = 64.1136, GNorm = 0.5637, lr_0 = 6.9778e-04
Loss = 1.5245e-01, PNorm = 64.1370, GNorm = 0.9395, lr_0 = 6.9730e-04
Loss = 1.5137e-01, PNorm = 64.1520, GNorm = 0.6692, lr_0 = 6.9683e-04
Loss = 1.5568e-01, PNorm = 64.1646, GNorm = 0.7171, lr_0 = 6.9635e-04
Loss = 1.5833e-01, PNorm = 64.1825, GNorm = 0.8388, lr_0 = 6.9587e-04
Loss = 1.3281e-01, PNorm = 64.2031, GNorm = 0.5123, lr_0 = 6.9540e-04
Loss = 1.5473e-01, PNorm = 64.2175, GNorm = 0.6396, lr_0 = 6.9492e-04
Loss = 1.5492e-01, PNorm = 64.2368, GNorm = 0.9247, lr_0 = 6.9444e-04
Loss = 1.6551e-01, PNorm = 64.2610, GNorm = 1.0172, lr_0 = 6.9397e-04
Loss = 1.5826e-01, PNorm = 64.2753, GNorm = 0.9206, lr_0 = 6.9349e-04
Loss = 1.6009e-01, PNorm = 64.2877, GNorm = 0.5409, lr_0 = 6.9302e-04
Loss = 1.6089e-01, PNorm = 64.3076, GNorm = 0.7975, lr_0 = 6.9254e-04
Loss = 1.6592e-01, PNorm = 64.3191, GNorm = 0.8186, lr_0 = 6.9207e-04
Loss = 1.6748e-01, PNorm = 64.3394, GNorm = 1.7609, lr_0 = 6.9159e-04
Loss = 1.6032e-01, PNorm = 64.3554, GNorm = 0.8909, lr_0 = 6.9112e-04
Loss = 1.6521e-01, PNorm = 64.3739, GNorm = 0.8587, lr_0 = 6.9065e-04
Loss = 1.5490e-01, PNorm = 64.3889, GNorm = 0.6074, lr_0 = 6.9017e-04
Loss = 1.7556e-01, PNorm = 64.4096, GNorm = 0.9514, lr_0 = 6.8970e-04
Loss = 1.6682e-01, PNorm = 64.4344, GNorm = 0.7899, lr_0 = 6.8923e-04
Loss = 1.7067e-01, PNorm = 64.4572, GNorm = 1.1733, lr_0 = 6.8876e-04
Loss = 1.7620e-01, PNorm = 64.4873, GNorm = 0.9765, lr_0 = 6.8828e-04
Loss = 1.6958e-01, PNorm = 64.5145, GNorm = 0.7912, lr_0 = 6.8781e-04
Loss = 1.5658e-01, PNorm = 64.5378, GNorm = 0.5581, lr_0 = 6.8734e-04
Loss = 1.6174e-01, PNorm = 64.5558, GNorm = 0.6924, lr_0 = 6.8687e-04
Loss = 1.6865e-01, PNorm = 64.5752, GNorm = 0.9145, lr_0 = 6.8640e-04
Loss = 1.5280e-01, PNorm = 64.5956, GNorm = 0.6630, lr_0 = 6.8593e-04
Loss = 1.7074e-01, PNorm = 64.6231, GNorm = 0.7642, lr_0 = 6.8546e-04
Loss = 1.7384e-01, PNorm = 64.6414, GNorm = 0.6089, lr_0 = 6.8499e-04
Loss = 1.7309e-01, PNorm = 64.6665, GNorm = 0.6399, lr_0 = 6.8452e-04
Loss = 1.6957e-01, PNorm = 64.6860, GNorm = 0.7292, lr_0 = 6.8405e-04
Loss = 1.8786e-01, PNorm = 64.7079, GNorm = 0.8035, lr_0 = 6.8358e-04
Loss = 1.6362e-01, PNorm = 64.7333, GNorm = 0.8346, lr_0 = 6.8312e-04
Loss = 1.7661e-01, PNorm = 64.7573, GNorm = 0.9232, lr_0 = 6.8265e-04
Loss = 1.5129e-01, PNorm = 64.7769, GNorm = 1.2702, lr_0 = 6.8218e-04
Loss = 1.7353e-01, PNorm = 64.7934, GNorm = 0.5430, lr_0 = 6.8171e-04
Loss = 1.6830e-01, PNorm = 64.8213, GNorm = 1.1505, lr_0 = 6.8125e-04
Loss = 1.7917e-01, PNorm = 64.8437, GNorm = 0.9735, lr_0 = 6.8078e-04
Loss = 1.7821e-01, PNorm = 64.8676, GNorm = 0.8593, lr_0 = 6.8031e-04
Loss = 1.5637e-01, PNorm = 64.8931, GNorm = 0.6432, lr_0 = 6.7985e-04
Loss = 1.5586e-01, PNorm = 64.9202, GNorm = 0.8175, lr_0 = 6.7938e-04
Loss = 1.6052e-01, PNorm = 64.9450, GNorm = 0.8043, lr_0 = 6.7892e-04
Loss = 1.5052e-01, PNorm = 64.9645, GNorm = 0.7290, lr_0 = 6.7845e-04
Loss = 1.6867e-01, PNorm = 64.9831, GNorm = 0.6305, lr_0 = 6.7799e-04
Loss = 1.8198e-01, PNorm = 65.0029, GNorm = 0.8338, lr_0 = 6.7752e-04
Loss = 1.7467e-01, PNorm = 65.0274, GNorm = 1.3402, lr_0 = 6.7706e-04
Loss = 1.7355e-01, PNorm = 65.0471, GNorm = 1.0017, lr_0 = 6.7659e-04
Loss = 1.6300e-01, PNorm = 65.0642, GNorm = 0.6492, lr_0 = 6.7613e-04
Loss = 1.6714e-01, PNorm = 65.0827, GNorm = 0.8421, lr_0 = 6.7567e-04
Loss = 1.6097e-01, PNorm = 65.1055, GNorm = 0.7098, lr_0 = 6.7520e-04
Loss = 1.5137e-01, PNorm = 65.1210, GNorm = 0.5151, lr_0 = 6.7474e-04
Loss = 1.5407e-01, PNorm = 65.1370, GNorm = 0.9252, lr_0 = 6.7428e-04
Loss = 1.7140e-01, PNorm = 65.1601, GNorm = 1.4356, lr_0 = 6.7382e-04
Loss = 1.9795e-01, PNorm = 65.1853, GNorm = 1.1547, lr_0 = 6.7335e-04
Loss = 1.7966e-01, PNorm = 65.2052, GNorm = 0.7479, lr_0 = 6.7289e-04
Loss = 1.7088e-01, PNorm = 65.2299, GNorm = 0.5968, lr_0 = 6.7243e-04
Loss = 1.4704e-01, PNorm = 65.2549, GNorm = 0.6952, lr_0 = 6.7197e-04
Loss = 1.5828e-01, PNorm = 65.2815, GNorm = 1.3230, lr_0 = 6.7151e-04
Loss = 1.5498e-01, PNorm = 65.2970, GNorm = 0.5286, lr_0 = 6.7105e-04
Loss = 1.6623e-01, PNorm = 65.3163, GNorm = 1.1827, lr_0 = 6.7059e-04
Loss = 1.6020e-01, PNorm = 65.3365, GNorm = 0.5640, lr_0 = 6.7013e-04
Loss = 1.6709e-01, PNorm = 65.3678, GNorm = 0.8715, lr_0 = 6.6967e-04
Loss = 1.5368e-01, PNorm = 65.3933, GNorm = 0.7017, lr_0 = 6.6921e-04
Loss = 1.4880e-01, PNorm = 65.4112, GNorm = 0.5911, lr_0 = 6.6876e-04
Loss = 1.6532e-01, PNorm = 65.4176, GNorm = 0.8314, lr_0 = 6.6830e-04
Loss = 1.8518e-01, PNorm = 65.4337, GNorm = 1.3699, lr_0 = 6.6784e-04
Loss = 1.7946e-01, PNorm = 65.4558, GNorm = 0.8566, lr_0 = 6.6738e-04
Loss = 1.5110e-01, PNorm = 65.4766, GNorm = 0.7252, lr_0 = 6.6693e-04
Loss = 1.6338e-01, PNorm = 65.5081, GNorm = 1.4806, lr_0 = 6.6647e-04
Loss = 1.3505e-01, PNorm = 65.5314, GNorm = 0.5512, lr_0 = 6.6601e-04
Loss = 1.7225e-01, PNorm = 65.5540, GNorm = 1.2516, lr_0 = 6.6556e-04
Loss = 1.6849e-01, PNorm = 65.5656, GNorm = 0.6868, lr_0 = 6.6510e-04
Loss = 1.6984e-01, PNorm = 65.5914, GNorm = 0.7422, lr_0 = 6.6464e-04
Loss = 1.6509e-01, PNorm = 65.6170, GNorm = 0.6911, lr_0 = 6.6419e-04
Loss = 1.6435e-01, PNorm = 65.6443, GNorm = 1.2141, lr_0 = 6.6373e-04
Loss = 1.5582e-01, PNorm = 65.6704, GNorm = 1.4867, lr_0 = 6.6328e-04
Loss = 1.6346e-01, PNorm = 65.6843, GNorm = 0.8387, lr_0 = 6.6282e-04
Validation mae = 0.246935
Epoch 7
Loss = 1.5283e-01, PNorm = 65.7070, GNorm = 0.6880, lr_0 = 6.6237e-04
Loss = 1.3228e-01, PNorm = 65.7241, GNorm = 1.4590, lr_0 = 6.6192e-04
Loss = 1.7058e-01, PNorm = 65.7440, GNorm = 2.4515, lr_0 = 6.6146e-04
Loss = 1.6896e-01, PNorm = 65.7655, GNorm = 1.0618, lr_0 = 6.6101e-04
Loss = 1.8856e-01, PNorm = 65.7893, GNorm = 2.1492, lr_0 = 6.6056e-04
Loss = 1.3574e-01, PNorm = 65.8157, GNorm = 0.6238, lr_0 = 6.6011e-04
Loss = 1.7603e-01, PNorm = 65.8435, GNorm = 0.8100, lr_0 = 6.5965e-04
Loss = 1.5307e-01, PNorm = 65.8688, GNorm = 0.6757, lr_0 = 6.5920e-04
Loss = 1.5024e-01, PNorm = 65.8907, GNorm = 0.9555, lr_0 = 6.5875e-04
Loss = 1.4788e-01, PNorm = 65.9023, GNorm = 0.6631, lr_0 = 6.5830e-04
Loss = 1.6079e-01, PNorm = 65.9231, GNorm = 0.7738, lr_0 = 6.5785e-04
Loss = 2.0248e-01, PNorm = 65.9457, GNorm = 1.1931, lr_0 = 6.5740e-04
Loss = 1.5720e-01, PNorm = 65.9736, GNorm = 0.7926, lr_0 = 6.5695e-04
Loss = 1.6477e-01, PNorm = 65.9972, GNorm = 0.8937, lr_0 = 6.5650e-04
Loss = 1.4997e-01, PNorm = 66.0205, GNorm = 1.5747, lr_0 = 6.5605e-04
Loss = 1.5065e-01, PNorm = 66.0482, GNorm = 1.4281, lr_0 = 6.5560e-04
Loss = 1.4236e-01, PNorm = 66.0741, GNorm = 0.7828, lr_0 = 6.5515e-04
Loss = 1.5946e-01, PNorm = 66.0962, GNorm = 0.8737, lr_0 = 6.5470e-04
Loss = 1.4201e-01, PNorm = 66.1161, GNorm = 0.8024, lr_0 = 6.5425e-04
Loss = 1.6767e-01, PNorm = 66.1415, GNorm = 0.8841, lr_0 = 6.5380e-04
Loss = 1.6073e-01, PNorm = 66.1586, GNorm = 0.5413, lr_0 = 6.5335e-04
Loss = 1.4648e-01, PNorm = 66.1797, GNorm = 1.0231, lr_0 = 6.5291e-04
Loss = 1.4468e-01, PNorm = 66.2021, GNorm = 0.6638, lr_0 = 6.5246e-04
Loss = 1.4813e-01, PNorm = 66.2234, GNorm = 1.1358, lr_0 = 6.5201e-04
Loss = 1.4472e-01, PNorm = 66.2446, GNorm = 0.7856, lr_0 = 6.5157e-04
Loss = 1.6251e-01, PNorm = 66.2649, GNorm = 1.3050, lr_0 = 6.5112e-04
Loss = 1.3904e-01, PNorm = 66.2783, GNorm = 0.7785, lr_0 = 6.5067e-04
Loss = 1.4790e-01, PNorm = 66.2982, GNorm = 1.5181, lr_0 = 6.5023e-04
Loss = 1.6807e-01, PNorm = 66.3198, GNorm = 1.5977, lr_0 = 6.4978e-04
Loss = 1.6725e-01, PNorm = 66.3410, GNorm = 0.6802, lr_0 = 6.4934e-04
Loss = 1.5637e-01, PNorm = 66.3685, GNorm = 0.8559, lr_0 = 6.4889e-04
Loss = 1.5004e-01, PNorm = 66.3950, GNorm = 0.8134, lr_0 = 6.4845e-04
Loss = 1.7906e-01, PNorm = 66.4187, GNorm = 0.8437, lr_0 = 6.4800e-04
Loss = 1.6406e-01, PNorm = 66.4430, GNorm = 0.8057, lr_0 = 6.4756e-04
Loss = 1.6662e-01, PNorm = 66.4705, GNorm = 0.6239, lr_0 = 6.4712e-04
Loss = 1.4876e-01, PNorm = 66.4894, GNorm = 0.6957, lr_0 = 6.4667e-04
Loss = 1.4569e-01, PNorm = 66.5105, GNorm = 0.8969, lr_0 = 6.4623e-04
Loss = 1.5379e-01, PNorm = 66.5213, GNorm = 0.5925, lr_0 = 6.4579e-04
Loss = 1.7115e-01, PNorm = 66.5419, GNorm = 0.7293, lr_0 = 6.4534e-04
Loss = 1.8791e-01, PNorm = 66.5582, GNorm = 0.6748, lr_0 = 6.4490e-04
Loss = 1.6728e-01, PNorm = 66.5919, GNorm = 1.2194, lr_0 = 6.4446e-04
Loss = 1.8757e-01, PNorm = 66.6201, GNorm = 1.4547, lr_0 = 6.4402e-04
Loss = 1.8372e-01, PNorm = 66.6458, GNorm = 0.8473, lr_0 = 6.4358e-04
Loss = 1.4461e-01, PNorm = 66.6678, GNorm = 0.7414, lr_0 = 6.4314e-04
Loss = 1.5713e-01, PNorm = 66.6866, GNorm = 0.9420, lr_0 = 6.4270e-04
Loss = 1.6355e-01, PNorm = 66.7038, GNorm = 0.8971, lr_0 = 6.4226e-04
Loss = 1.4342e-01, PNorm = 66.7194, GNorm = 1.2970, lr_0 = 6.4182e-04
Loss = 1.6116e-01, PNorm = 66.7392, GNorm = 0.5573, lr_0 = 6.4138e-04
Loss = 1.6300e-01, PNorm = 66.7617, GNorm = 1.1025, lr_0 = 6.4094e-04
Loss = 1.5705e-01, PNorm = 66.7835, GNorm = 0.7225, lr_0 = 6.4050e-04
Loss = 1.6205e-01, PNorm = 66.8008, GNorm = 1.2428, lr_0 = 6.4006e-04
Loss = 1.4722e-01, PNorm = 66.8240, GNorm = 1.6092, lr_0 = 6.3962e-04
Loss = 1.5300e-01, PNorm = 66.8459, GNorm = 0.7189, lr_0 = 6.3918e-04
Loss = 1.4538e-01, PNorm = 66.8621, GNorm = 0.7175, lr_0 = 6.3874e-04
Loss = 1.4511e-01, PNorm = 66.8859, GNorm = 0.6466, lr_0 = 6.3831e-04
Loss = 1.2211e-01, PNorm = 66.9005, GNorm = 0.7296, lr_0 = 6.3787e-04
Loss = 1.7437e-01, PNorm = 66.9178, GNorm = 1.1530, lr_0 = 6.3743e-04
Loss = 1.5732e-01, PNorm = 66.9367, GNorm = 0.4030, lr_0 = 6.3700e-04
Loss = 1.5616e-01, PNorm = 66.9538, GNorm = 0.9726, lr_0 = 6.3656e-04
Loss = 1.5736e-01, PNorm = 66.9696, GNorm = 0.5921, lr_0 = 6.3612e-04
Loss = 1.7082e-01, PNorm = 66.9945, GNorm = 1.0413, lr_0 = 6.3569e-04
Loss = 1.3559e-01, PNorm = 67.0148, GNorm = 0.6137, lr_0 = 6.3525e-04
Loss = 1.6221e-01, PNorm = 67.0338, GNorm = 1.7684, lr_0 = 6.3482e-04
Loss = 1.5699e-01, PNorm = 67.0592, GNorm = 0.6771, lr_0 = 6.3438e-04
Loss = 1.4301e-01, PNorm = 67.0812, GNorm = 0.8421, lr_0 = 6.3395e-04
Loss = 1.5983e-01, PNorm = 67.0970, GNorm = 1.0659, lr_0 = 6.3351e-04
Loss = 1.6416e-01, PNorm = 67.1073, GNorm = 0.6416, lr_0 = 6.3308e-04
Loss = 1.7305e-01, PNorm = 67.1285, GNorm = 0.6469, lr_0 = 6.3265e-04
Loss = 1.7533e-01, PNorm = 67.1482, GNorm = 0.7468, lr_0 = 6.3221e-04
Loss = 1.6593e-01, PNorm = 67.1766, GNorm = 0.7887, lr_0 = 6.3178e-04
Loss = 1.5821e-01, PNorm = 67.1943, GNorm = 0.4996, lr_0 = 6.3135e-04
Loss = 1.4328e-01, PNorm = 67.2178, GNorm = 0.6193, lr_0 = 6.3091e-04
Loss = 1.6560e-01, PNorm = 67.2355, GNorm = 0.6973, lr_0 = 6.3048e-04
Loss = 1.5125e-01, PNorm = 67.2552, GNorm = 1.3942, lr_0 = 6.3005e-04
Loss = 1.5859e-01, PNorm = 67.2699, GNorm = 0.9737, lr_0 = 6.2962e-04
Loss = 1.6198e-01, PNorm = 67.2860, GNorm = 0.7772, lr_0 = 6.2919e-04
Loss = 1.4156e-01, PNorm = 67.3048, GNorm = 0.6830, lr_0 = 6.2876e-04
Loss = 1.5800e-01, PNorm = 67.3213, GNorm = 0.7642, lr_0 = 6.2833e-04
Loss = 1.6509e-01, PNorm = 67.3423, GNorm = 0.9723, lr_0 = 6.2789e-04
Loss = 1.4576e-01, PNorm = 67.3654, GNorm = 0.5160, lr_0 = 6.2746e-04
Loss = 1.4284e-01, PNorm = 67.3874, GNorm = 0.6030, lr_0 = 6.2703e-04
Loss = 1.7162e-01, PNorm = 67.4082, GNorm = 1.5570, lr_0 = 6.2661e-04
Loss = 1.5232e-01, PNorm = 67.4245, GNorm = 0.7850, lr_0 = 6.2618e-04
Loss = 1.7317e-01, PNorm = 67.4477, GNorm = 0.7581, lr_0 = 6.2575e-04
Loss = 1.6290e-01, PNorm = 67.4766, GNorm = 1.4201, lr_0 = 6.2532e-04
Loss = 1.8298e-01, PNorm = 67.4986, GNorm = 1.2380, lr_0 = 6.2489e-04
Loss = 1.5347e-01, PNorm = 67.5236, GNorm = 0.5742, lr_0 = 6.2446e-04
Loss = 1.2798e-01, PNorm = 67.5453, GNorm = 0.8546, lr_0 = 6.2403e-04
Loss = 1.5274e-01, PNorm = 67.5594, GNorm = 0.7119, lr_0 = 6.2361e-04
Loss = 1.5506e-01, PNorm = 67.5762, GNorm = 0.5684, lr_0 = 6.2318e-04
Loss = 1.5894e-01, PNorm = 67.5913, GNorm = 0.6980, lr_0 = 6.2275e-04
Loss = 1.7262e-01, PNorm = 67.6119, GNorm = 0.7237, lr_0 = 6.2233e-04
Loss = 1.4692e-01, PNorm = 67.6271, GNorm = 0.7587, lr_0 = 6.2190e-04
Loss = 1.4751e-01, PNorm = 67.6420, GNorm = 0.8923, lr_0 = 6.2147e-04
Loss = 1.6244e-01, PNorm = 67.6609, GNorm = 0.6143, lr_0 = 6.2105e-04
Loss = 1.4526e-01, PNorm = 67.6723, GNorm = 0.7061, lr_0 = 6.2062e-04
Loss = 1.6870e-01, PNorm = 67.6891, GNorm = 1.2923, lr_0 = 6.2020e-04
Loss = 1.5833e-01, PNorm = 67.6972, GNorm = 0.8350, lr_0 = 6.1977e-04
Loss = 1.5929e-01, PNorm = 67.7201, GNorm = 1.0409, lr_0 = 6.1935e-04
Loss = 1.7184e-01, PNorm = 67.7411, GNorm = 1.1673, lr_0 = 6.1892e-04
Loss = 1.6931e-01, PNorm = 67.7591, GNorm = 1.0547, lr_0 = 6.1850e-04
Loss = 1.5606e-01, PNorm = 67.7879, GNorm = 0.8477, lr_0 = 6.1808e-04
Loss = 1.4942e-01, PNorm = 67.8086, GNorm = 1.3183, lr_0 = 6.1765e-04
Loss = 1.3207e-01, PNorm = 67.8191, GNorm = 0.8227, lr_0 = 6.1723e-04
Loss = 1.5073e-01, PNorm = 67.8306, GNorm = 0.8369, lr_0 = 6.1681e-04
Loss = 1.6823e-01, PNorm = 67.8471, GNorm = 0.7916, lr_0 = 6.1638e-04
Loss = 1.5069e-01, PNorm = 67.8634, GNorm = 0.8469, lr_0 = 6.1596e-04
Loss = 1.4888e-01, PNorm = 67.8861, GNorm = 1.1349, lr_0 = 6.1554e-04
Loss = 1.3613e-01, PNorm = 67.9118, GNorm = 0.6532, lr_0 = 6.1512e-04
Loss = 1.5548e-01, PNorm = 67.9328, GNorm = 0.8607, lr_0 = 6.1470e-04
Loss = 1.4291e-01, PNorm = 67.9531, GNorm = 0.6611, lr_0 = 6.1428e-04
Loss = 1.7122e-01, PNorm = 67.9702, GNorm = 1.0470, lr_0 = 6.1385e-04
Loss = 1.6635e-01, PNorm = 67.9909, GNorm = 1.5186, lr_0 = 6.1343e-04
Loss = 1.7303e-01, PNorm = 68.0109, GNorm = 1.0095, lr_0 = 6.1301e-04
Loss = 1.7471e-01, PNorm = 68.0318, GNorm = 1.2076, lr_0 = 6.1259e-04
Loss = 1.5963e-01, PNorm = 68.0450, GNorm = 0.8943, lr_0 = 6.1217e-04
Loss = 1.3993e-01, PNorm = 68.0632, GNorm = 1.0955, lr_0 = 6.1175e-04
Loss = 1.5631e-01, PNorm = 68.0783, GNorm = 0.7539, lr_0 = 6.1134e-04
Loss = 1.5445e-01, PNorm = 68.0963, GNorm = 0.9156, lr_0 = 6.1092e-04
Loss = 1.6184e-01, PNorm = 68.1195, GNorm = 0.8840, lr_0 = 6.1050e-04
Validation mae = 0.243649
Epoch 8
Loss = 1.3887e-01, PNorm = 68.1416, GNorm = 0.5650, lr_0 = 6.1008e-04
Loss = 1.5516e-01, PNorm = 68.1592, GNorm = 0.9055, lr_0 = 6.0966e-04
Loss = 1.7154e-01, PNorm = 68.1798, GNorm = 1.0219, lr_0 = 6.0924e-04
Loss = 1.3085e-01, PNorm = 68.2016, GNorm = 0.8630, lr_0 = 6.0883e-04
Loss = 1.3306e-01, PNorm = 68.2218, GNorm = 0.8183, lr_0 = 6.0841e-04
Loss = 1.4951e-01, PNorm = 68.2396, GNorm = 0.5901, lr_0 = 6.0799e-04
Loss = 1.6157e-01, PNorm = 68.2603, GNorm = 0.8803, lr_0 = 6.0758e-04
Loss = 1.5143e-01, PNorm = 68.2795, GNorm = 0.6562, lr_0 = 6.0716e-04
Loss = 1.3355e-01, PNorm = 68.3015, GNorm = 0.5363, lr_0 = 6.0674e-04
Loss = 1.3403e-01, PNorm = 68.3133, GNorm = 0.7180, lr_0 = 6.0633e-04
Loss = 1.5472e-01, PNorm = 68.3328, GNorm = 1.0637, lr_0 = 6.0591e-04
Loss = 1.3108e-01, PNorm = 68.3481, GNorm = 0.6720, lr_0 = 6.0550e-04
Loss = 1.5275e-01, PNorm = 68.3756, GNorm = 1.1761, lr_0 = 6.0508e-04
Loss = 1.4414e-01, PNorm = 68.4048, GNorm = 0.7737, lr_0 = 6.0467e-04
Loss = 1.5020e-01, PNorm = 68.4239, GNorm = 0.7605, lr_0 = 6.0425e-04
Loss = 1.4547e-01, PNorm = 68.4385, GNorm = 0.8734, lr_0 = 6.0384e-04
Loss = 1.4029e-01, PNorm = 68.4524, GNorm = 0.6834, lr_0 = 6.0343e-04
Loss = 1.2435e-01, PNorm = 68.4618, GNorm = 0.6841, lr_0 = 6.0301e-04
Loss = 1.2558e-01, PNorm = 68.4761, GNorm = 0.5430, lr_0 = 6.0260e-04
Loss = 1.5401e-01, PNorm = 68.4985, GNorm = 0.6050, lr_0 = 6.0219e-04
Loss = 1.6498e-01, PNorm = 68.5191, GNorm = 1.8752, lr_0 = 6.0178e-04
Loss = 1.8146e-01, PNorm = 68.5451, GNorm = 1.0441, lr_0 = 6.0136e-04
Loss = 1.4498e-01, PNorm = 68.5694, GNorm = 0.7594, lr_0 = 6.0095e-04
Loss = 1.5945e-01, PNorm = 68.5883, GNorm = 0.8784, lr_0 = 6.0054e-04
Loss = 1.6195e-01, PNorm = 68.6171, GNorm = 1.8936, lr_0 = 6.0013e-04
Loss = 1.3156e-01, PNorm = 68.6395, GNorm = 0.8407, lr_0 = 5.9972e-04
Loss = 1.4269e-01, PNorm = 68.6556, GNorm = 0.9501, lr_0 = 5.9931e-04
Loss = 1.6219e-01, PNorm = 68.6767, GNorm = 0.6483, lr_0 = 5.9890e-04
Loss = 1.6678e-01, PNorm = 68.7005, GNorm = 0.9615, lr_0 = 5.9849e-04
Loss = 1.3430e-01, PNorm = 68.7234, GNorm = 0.5729, lr_0 = 5.9808e-04
Loss = 1.4511e-01, PNorm = 68.7385, GNorm = 0.8289, lr_0 = 5.9767e-04
Loss = 1.5844e-01, PNorm = 68.7558, GNorm = 0.5219, lr_0 = 5.9726e-04
Loss = 1.5346e-01, PNorm = 68.7741, GNorm = 0.6960, lr_0 = 5.9685e-04
Loss = 1.3795e-01, PNorm = 68.7851, GNorm = 0.9208, lr_0 = 5.9644e-04
Loss = 1.6029e-01, PNorm = 68.8032, GNorm = 0.7435, lr_0 = 5.9603e-04
Loss = 1.3818e-01, PNorm = 68.8202, GNorm = 0.7788, lr_0 = 5.9562e-04
Loss = 1.3600e-01, PNorm = 68.8398, GNorm = 0.7676, lr_0 = 5.9521e-04
Loss = 1.4241e-01, PNorm = 68.8519, GNorm = 0.5392, lr_0 = 5.9481e-04
Loss = 1.3306e-01, PNorm = 68.8684, GNorm = 1.0850, lr_0 = 5.9440e-04
Loss = 1.4654e-01, PNorm = 68.8805, GNorm = 0.6681, lr_0 = 5.9399e-04
Loss = 1.4966e-01, PNorm = 68.9005, GNorm = 1.1401, lr_0 = 5.9358e-04
Loss = 1.4960e-01, PNorm = 68.9176, GNorm = 0.6166, lr_0 = 5.9318e-04
Loss = 1.4799e-01, PNorm = 68.9386, GNorm = 0.5645, lr_0 = 5.9277e-04
Loss = 1.4293e-01, PNorm = 68.9571, GNorm = 0.8718, lr_0 = 5.9236e-04
Loss = 1.3868e-01, PNorm = 68.9727, GNorm = 1.1119, lr_0 = 5.9196e-04
Loss = 1.3451e-01, PNorm = 68.9881, GNorm = 0.5051, lr_0 = 5.9155e-04
Loss = 1.4191e-01, PNorm = 69.0057, GNorm = 0.6970, lr_0 = 5.9115e-04
Loss = 1.6698e-01, PNorm = 69.0254, GNorm = 0.8863, lr_0 = 5.9074e-04
Loss = 1.4770e-01, PNorm = 69.0399, GNorm = 0.5793, lr_0 = 5.9034e-04
Loss = 1.5529e-01, PNorm = 69.0570, GNorm = 0.5852, lr_0 = 5.8993e-04
Loss = 1.5785e-01, PNorm = 69.0812, GNorm = 0.8605, lr_0 = 5.8953e-04
Loss = 1.6487e-01, PNorm = 69.1007, GNorm = 0.6091, lr_0 = 5.8913e-04
Loss = 1.4625e-01, PNorm = 69.1238, GNorm = 0.6809, lr_0 = 5.8872e-04
Loss = 1.5104e-01, PNorm = 69.1424, GNorm = 0.6180, lr_0 = 5.8832e-04
Loss = 1.3873e-01, PNorm = 69.1619, GNorm = 0.4613, lr_0 = 5.8792e-04
Loss = 1.5018e-01, PNorm = 69.1823, GNorm = 0.5710, lr_0 = 5.8751e-04
Loss = 1.4594e-01, PNorm = 69.1996, GNorm = 0.6658, lr_0 = 5.8711e-04
Loss = 1.3887e-01, PNorm = 69.2163, GNorm = 0.8724, lr_0 = 5.8671e-04
Loss = 1.3991e-01, PNorm = 69.2405, GNorm = 1.7107, lr_0 = 5.8631e-04
Loss = 1.7824e-01, PNorm = 69.2581, GNorm = 1.1818, lr_0 = 5.8591e-04
Loss = 1.4020e-01, PNorm = 69.2866, GNorm = 0.7662, lr_0 = 5.8550e-04
Loss = 1.6415e-01, PNorm = 69.3043, GNorm = 0.5710, lr_0 = 5.8510e-04
Loss = 1.4077e-01, PNorm = 69.3224, GNorm = 0.5039, lr_0 = 5.8470e-04
Loss = 1.4742e-01, PNorm = 69.3429, GNorm = 0.6530, lr_0 = 5.8430e-04
Loss = 1.4098e-01, PNorm = 69.3635, GNorm = 0.7046, lr_0 = 5.8390e-04
Loss = 1.6715e-01, PNorm = 69.3835, GNorm = 1.0325, lr_0 = 5.8350e-04
Loss = 1.4281e-01, PNorm = 69.4040, GNorm = 0.7176, lr_0 = 5.8310e-04
Loss = 1.5699e-01, PNorm = 69.4231, GNorm = 0.9443, lr_0 = 5.8270e-04
Loss = 1.5668e-01, PNorm = 69.4419, GNorm = 0.8934, lr_0 = 5.8230e-04
Loss = 1.5197e-01, PNorm = 69.4594, GNorm = 0.6720, lr_0 = 5.8190e-04
Loss = 1.3800e-01, PNorm = 69.4789, GNorm = 1.0577, lr_0 = 5.8151e-04
Loss = 1.4888e-01, PNorm = 69.4987, GNorm = 1.1729, lr_0 = 5.8111e-04
Loss = 1.4352e-01, PNorm = 69.5158, GNorm = 0.7351, lr_0 = 5.8071e-04
Loss = 1.4596e-01, PNorm = 69.5273, GNorm = 0.6597, lr_0 = 5.8031e-04
Loss = 1.4589e-01, PNorm = 69.5431, GNorm = 0.6935, lr_0 = 5.7991e-04
Loss = 1.6018e-01, PNorm = 69.5537, GNorm = 0.5142, lr_0 = 5.7952e-04
Loss = 1.4965e-01, PNorm = 69.5691, GNorm = 0.5552, lr_0 = 5.7912e-04
Loss = 1.4933e-01, PNorm = 69.5952, GNorm = 0.6415, lr_0 = 5.7872e-04
Loss = 1.5411e-01, PNorm = 69.6154, GNorm = 0.9803, lr_0 = 5.7833e-04
Loss = 1.3839e-01, PNorm = 69.6308, GNorm = 0.8209, lr_0 = 5.7793e-04
Loss = 1.4120e-01, PNorm = 69.6430, GNorm = 0.7789, lr_0 = 5.7753e-04
Loss = 1.2785e-01, PNorm = 69.6586, GNorm = 0.5578, lr_0 = 5.7714e-04
Loss = 1.2634e-01, PNorm = 69.6766, GNorm = 1.2107, lr_0 = 5.7674e-04
Loss = 1.4132e-01, PNorm = 69.6907, GNorm = 0.4964, lr_0 = 5.7635e-04
Loss = 1.4268e-01, PNorm = 69.7085, GNorm = 0.7792, lr_0 = 5.7595e-04
Loss = 1.4942e-01, PNorm = 69.7267, GNorm = 1.1576, lr_0 = 5.7556e-04
Loss = 1.5018e-01, PNorm = 69.7435, GNorm = 0.7454, lr_0 = 5.7516e-04
Loss = 1.5019e-01, PNorm = 69.7586, GNorm = 0.7509, lr_0 = 5.7477e-04
Loss = 1.3501e-01, PNorm = 69.7729, GNorm = 0.8786, lr_0 = 5.7438e-04
Loss = 1.6041e-01, PNorm = 69.7873, GNorm = 0.9315, lr_0 = 5.7398e-04
Loss = 1.4563e-01, PNorm = 69.8003, GNorm = 0.8340, lr_0 = 5.7359e-04
Loss = 1.4725e-01, PNorm = 69.8139, GNorm = 0.8020, lr_0 = 5.7320e-04
Loss = 1.1757e-01, PNorm = 69.8280, GNorm = 0.5859, lr_0 = 5.7280e-04
Loss = 1.3619e-01, PNorm = 69.8443, GNorm = 0.6321, lr_0 = 5.7241e-04
Loss = 1.4834e-01, PNorm = 69.8674, GNorm = 0.8210, lr_0 = 5.7202e-04
Loss = 1.4398e-01, PNorm = 69.8799, GNorm = 0.8610, lr_0 = 5.7163e-04
Loss = 1.4625e-01, PNorm = 69.8973, GNorm = 0.5958, lr_0 = 5.7124e-04
Loss = 1.4339e-01, PNorm = 69.9112, GNorm = 0.6921, lr_0 = 5.7084e-04
Loss = 1.6156e-01, PNorm = 69.9266, GNorm = 0.6116, lr_0 = 5.7045e-04
Loss = 1.5308e-01, PNorm = 69.9448, GNorm = 0.4227, lr_0 = 5.7006e-04
Loss = 1.4752e-01, PNorm = 69.9654, GNorm = 0.6100, lr_0 = 5.6967e-04
Loss = 1.4264e-01, PNorm = 69.9852, GNorm = 0.7641, lr_0 = 5.6928e-04
Loss = 1.4349e-01, PNorm = 69.9997, GNorm = 0.7748, lr_0 = 5.6889e-04
Loss = 1.4886e-01, PNorm = 70.0200, GNorm = 0.6119, lr_0 = 5.6850e-04
Loss = 1.2907e-01, PNorm = 70.0371, GNorm = 0.6633, lr_0 = 5.6811e-04
Loss = 1.2730e-01, PNorm = 70.0526, GNorm = 0.9435, lr_0 = 5.6772e-04
Loss = 1.6209e-01, PNorm = 70.0665, GNorm = 1.5353, lr_0 = 5.6733e-04
Loss = 1.6051e-01, PNorm = 70.0794, GNorm = 0.8526, lr_0 = 5.6695e-04
Loss = 1.4153e-01, PNorm = 70.0968, GNorm = 0.9585, lr_0 = 5.6656e-04
Loss = 1.4029e-01, PNorm = 70.1165, GNorm = 0.5677, lr_0 = 5.6617e-04
Loss = 1.6119e-01, PNorm = 70.1432, GNorm = 1.2587, lr_0 = 5.6578e-04
Loss = 1.7312e-01, PNorm = 70.1596, GNorm = 0.8674, lr_0 = 5.6539e-04
Loss = 1.5323e-01, PNorm = 70.1798, GNorm = 1.1891, lr_0 = 5.6501e-04
Loss = 1.4384e-01, PNorm = 70.1974, GNorm = 1.1877, lr_0 = 5.6462e-04
Loss = 1.5708e-01, PNorm = 70.2126, GNorm = 2.1672, lr_0 = 5.6423e-04
Loss = 1.5141e-01, PNorm = 70.2307, GNorm = 1.2447, lr_0 = 5.6385e-04
Loss = 1.4217e-01, PNorm = 70.2522, GNorm = 0.9749, lr_0 = 5.6346e-04
Loss = 1.4977e-01, PNorm = 70.2730, GNorm = 0.9721, lr_0 = 5.6307e-04
Loss = 1.5798e-01, PNorm = 70.2884, GNorm = 1.2676, lr_0 = 5.6269e-04
Loss = 1.5074e-01, PNorm = 70.3088, GNorm = 0.6793, lr_0 = 5.6230e-04
Validation mae = 0.241163
Epoch 9
Loss = 1.4247e-01, PNorm = 70.3272, GNorm = 1.2460, lr_0 = 5.6192e-04
Loss = 1.2694e-01, PNorm = 70.3424, GNorm = 0.9351, lr_0 = 5.6153e-04
Loss = 1.2951e-01, PNorm = 70.3611, GNorm = 0.5065, lr_0 = 5.6115e-04
Loss = 1.4069e-01, PNorm = 70.3776, GNorm = 0.6599, lr_0 = 5.6076e-04
Loss = 1.5642e-01, PNorm = 70.3959, GNorm = 1.4064, lr_0 = 5.6038e-04
Loss = 1.5076e-01, PNorm = 70.4177, GNorm = 1.2211, lr_0 = 5.6000e-04
Loss = 1.4315e-01, PNorm = 70.4343, GNorm = 0.5472, lr_0 = 5.5961e-04
Loss = 1.3413e-01, PNorm = 70.4548, GNorm = 1.5380, lr_0 = 5.5923e-04
Loss = 1.3378e-01, PNorm = 70.4773, GNorm = 1.0558, lr_0 = 5.5885e-04
Loss = 1.5237e-01, PNorm = 70.5020, GNorm = 0.6670, lr_0 = 5.5846e-04
Loss = 1.3692e-01, PNorm = 70.5208, GNorm = 0.5255, lr_0 = 5.5808e-04
Loss = 1.4041e-01, PNorm = 70.5319, GNorm = 0.7218, lr_0 = 5.5770e-04
Loss = 1.3192e-01, PNorm = 70.5507, GNorm = 0.7268, lr_0 = 5.5732e-04
Loss = 1.2909e-01, PNorm = 70.5716, GNorm = 0.4694, lr_0 = 5.5693e-04
Loss = 1.3324e-01, PNorm = 70.5845, GNorm = 0.5033, lr_0 = 5.5655e-04
Loss = 1.3093e-01, PNorm = 70.5977, GNorm = 0.7663, lr_0 = 5.5617e-04
Loss = 1.4827e-01, PNorm = 70.6166, GNorm = 0.8454, lr_0 = 5.5579e-04
Loss = 1.4262e-01, PNorm = 70.6384, GNorm = 0.7432, lr_0 = 5.5541e-04
Loss = 1.3557e-01, PNorm = 70.6534, GNorm = 0.6075, lr_0 = 5.5503e-04
Loss = 1.3555e-01, PNorm = 70.6628, GNorm = 0.7200, lr_0 = 5.5465e-04
Loss = 1.4196e-01, PNorm = 70.6757, GNorm = 1.0991, lr_0 = 5.5427e-04
Loss = 1.5343e-01, PNorm = 70.6891, GNorm = 0.8458, lr_0 = 5.5389e-04
Loss = 1.2827e-01, PNorm = 70.7033, GNorm = 0.7853, lr_0 = 5.5351e-04
Loss = 1.3750e-01, PNorm = 70.7133, GNorm = 1.1692, lr_0 = 5.5313e-04
Loss = 1.3987e-01, PNorm = 70.7357, GNorm = 0.8713, lr_0 = 5.5275e-04
Loss = 1.6185e-01, PNorm = 70.7519, GNorm = 0.9284, lr_0 = 5.5237e-04
Loss = 1.4140e-01, PNorm = 70.7724, GNorm = 1.0729, lr_0 = 5.5199e-04
Loss = 1.4286e-01, PNorm = 70.7950, GNorm = 0.5382, lr_0 = 5.5162e-04
Loss = 1.4663e-01, PNorm = 70.8065, GNorm = 0.6885, lr_0 = 5.5124e-04
Loss = 1.4937e-01, PNorm = 70.8185, GNorm = 0.7899, lr_0 = 5.5086e-04
Loss = 1.4876e-01, PNorm = 70.8358, GNorm = 0.8866, lr_0 = 5.5048e-04
Loss = 1.5576e-01, PNorm = 70.8565, GNorm = 1.1313, lr_0 = 5.5011e-04
Loss = 1.5283e-01, PNorm = 70.8799, GNorm = 0.7340, lr_0 = 5.4973e-04
Loss = 1.4519e-01, PNorm = 70.8971, GNorm = 0.9059, lr_0 = 5.4935e-04
Loss = 1.4500e-01, PNorm = 70.9131, GNorm = 0.7643, lr_0 = 5.4898e-04
Loss = 1.4219e-01, PNorm = 70.9295, GNorm = 1.2801, lr_0 = 5.4860e-04
Loss = 1.2826e-01, PNorm = 70.9468, GNorm = 0.8675, lr_0 = 5.4822e-04
Loss = 1.5164e-01, PNorm = 70.9650, GNorm = 0.8393, lr_0 = 5.4785e-04
Loss = 1.5400e-01, PNorm = 70.9798, GNorm = 0.5746, lr_0 = 5.4747e-04
Loss = 1.3503e-01, PNorm = 70.9914, GNorm = 0.9791, lr_0 = 5.4710e-04
Loss = 1.5471e-01, PNorm = 71.0105, GNorm = 0.9933, lr_0 = 5.4672e-04
Loss = 1.5651e-01, PNorm = 71.0268, GNorm = 0.8111, lr_0 = 5.4635e-04
Loss = 1.4464e-01, PNorm = 71.0391, GNorm = 0.4866, lr_0 = 5.4597e-04
Loss = 1.5225e-01, PNorm = 71.0589, GNorm = 1.3879, lr_0 = 5.4560e-04
Loss = 1.4003e-01, PNorm = 71.0797, GNorm = 0.9993, lr_0 = 5.4523e-04
Loss = 1.3875e-01, PNorm = 71.0982, GNorm = 0.8761, lr_0 = 5.4485e-04
Loss = 1.3609e-01, PNorm = 71.1153, GNorm = 0.8863, lr_0 = 5.4448e-04
Loss = 1.3547e-01, PNorm = 71.1411, GNorm = 0.6883, lr_0 = 5.4411e-04
Loss = 1.4253e-01, PNorm = 71.1620, GNorm = 0.6757, lr_0 = 5.4373e-04
Loss = 1.2944e-01, PNorm = 71.1758, GNorm = 0.4211, lr_0 = 5.4336e-04
Loss = 1.4433e-01, PNorm = 71.1897, GNorm = 0.6833, lr_0 = 5.4299e-04
Loss = 1.4193e-01, PNorm = 71.2083, GNorm = 0.7268, lr_0 = 5.4262e-04
Loss = 1.3708e-01, PNorm = 71.2165, GNorm = 0.6789, lr_0 = 5.4225e-04
Loss = 1.4837e-01, PNorm = 71.2340, GNorm = 0.9508, lr_0 = 5.4187e-04
Loss = 1.3141e-01, PNorm = 71.2502, GNorm = 0.5467, lr_0 = 5.4150e-04
Loss = 1.2481e-01, PNorm = 71.2693, GNorm = 0.8627, lr_0 = 5.4113e-04
Loss = 1.2661e-01, PNorm = 71.2853, GNorm = 0.8108, lr_0 = 5.4076e-04
Loss = 1.4588e-01, PNorm = 71.3032, GNorm = 0.8789, lr_0 = 5.4039e-04
Loss = 1.6205e-01, PNorm = 71.3189, GNorm = 0.7233, lr_0 = 5.4002e-04
Loss = 1.5491e-01, PNorm = 71.3380, GNorm = 0.7254, lr_0 = 5.3965e-04
Loss = 1.3693e-01, PNorm = 71.3498, GNorm = 0.7976, lr_0 = 5.3928e-04
Loss = 1.3789e-01, PNorm = 71.3659, GNorm = 0.6185, lr_0 = 5.3891e-04
Loss = 1.5123e-01, PNorm = 71.3812, GNorm = 0.8214, lr_0 = 5.3854e-04
Loss = 1.4362e-01, PNorm = 71.3946, GNorm = 1.0260, lr_0 = 5.3817e-04
Loss = 1.3986e-01, PNorm = 71.4117, GNorm = 0.8310, lr_0 = 5.3781e-04
Loss = 1.4661e-01, PNorm = 71.4265, GNorm = 0.5891, lr_0 = 5.3744e-04
Loss = 1.3679e-01, PNorm = 71.4433, GNorm = 0.7339, lr_0 = 5.3707e-04
Loss = 1.3740e-01, PNorm = 71.4564, GNorm = 0.6409, lr_0 = 5.3670e-04
Loss = 1.4346e-01, PNorm = 71.4744, GNorm = 0.5433, lr_0 = 5.3633e-04
Loss = 1.4406e-01, PNorm = 71.4910, GNorm = 1.1621, lr_0 = 5.3597e-04
Loss = 1.3510e-01, PNorm = 71.5088, GNorm = 0.7034, lr_0 = 5.3560e-04
Loss = 1.4193e-01, PNorm = 71.5228, GNorm = 0.9876, lr_0 = 5.3523e-04
Loss = 1.5215e-01, PNorm = 71.5413, GNorm = 0.6925, lr_0 = 5.3486e-04
Loss = 1.7477e-01, PNorm = 71.5592, GNorm = 0.7761, lr_0 = 5.3450e-04
Loss = 1.4188e-01, PNorm = 71.5811, GNorm = 0.9738, lr_0 = 5.3413e-04
Loss = 1.4335e-01, PNorm = 71.5963, GNorm = 0.6063, lr_0 = 5.3377e-04
Loss = 1.3945e-01, PNorm = 71.6174, GNorm = 0.8263, lr_0 = 5.3340e-04
Loss = 1.3200e-01, PNorm = 71.6326, GNorm = 0.7652, lr_0 = 5.3304e-04
Loss = 1.2759e-01, PNorm = 71.6498, GNorm = 0.6128, lr_0 = 5.3267e-04
Loss = 1.3745e-01, PNorm = 71.6620, GNorm = 0.8727, lr_0 = 5.3231e-04
Loss = 1.4928e-01, PNorm = 71.6783, GNorm = 1.1632, lr_0 = 5.3194e-04
Loss = 1.3469e-01, PNorm = 71.6990, GNorm = 0.6942, lr_0 = 5.3158e-04
Loss = 1.4280e-01, PNorm = 71.7214, GNorm = 0.7702, lr_0 = 5.3121e-04
Loss = 1.2731e-01, PNorm = 71.7389, GNorm = 0.4909, lr_0 = 5.3085e-04
Loss = 1.5748e-01, PNorm = 71.7634, GNorm = 0.7779, lr_0 = 5.3048e-04
Loss = 1.4343e-01, PNorm = 71.7802, GNorm = 0.7818, lr_0 = 5.3012e-04
Loss = 1.3691e-01, PNorm = 71.8026, GNorm = 0.7898, lr_0 = 5.2976e-04
Loss = 1.4510e-01, PNorm = 71.8170, GNorm = 0.8341, lr_0 = 5.2939e-04
Loss = 1.5165e-01, PNorm = 71.8321, GNorm = 0.6855, lr_0 = 5.2903e-04
Loss = 1.2804e-01, PNorm = 71.8478, GNorm = 0.7457, lr_0 = 5.2867e-04
Loss = 1.4983e-01, PNorm = 71.8637, GNorm = 0.9507, lr_0 = 5.2831e-04
Loss = 1.3939e-01, PNorm = 71.8817, GNorm = 0.5575, lr_0 = 5.2795e-04
Loss = 1.5322e-01, PNorm = 71.8994, GNorm = 0.8100, lr_0 = 5.2758e-04
Loss = 1.4153e-01, PNorm = 71.9134, GNorm = 0.7567, lr_0 = 5.2722e-04
Loss = 1.4301e-01, PNorm = 71.9282, GNorm = 0.7636, lr_0 = 5.2686e-04
Loss = 1.2210e-01, PNorm = 71.9396, GNorm = 1.4255, lr_0 = 5.2650e-04
Loss = 1.4265e-01, PNorm = 71.9495, GNorm = 0.6140, lr_0 = 5.2614e-04
Loss = 1.2321e-01, PNorm = 71.9677, GNorm = 0.7472, lr_0 = 5.2578e-04
Loss = 1.4980e-01, PNorm = 71.9851, GNorm = 0.7175, lr_0 = 5.2542e-04
Loss = 1.4965e-01, PNorm = 72.0060, GNorm = 0.7069, lr_0 = 5.2506e-04
Loss = 1.3721e-01, PNorm = 72.0255, GNorm = 0.5069, lr_0 = 5.2470e-04
Loss = 1.3047e-01, PNorm = 72.0457, GNorm = 0.7081, lr_0 = 5.2434e-04
Loss = 1.2312e-01, PNorm = 72.0639, GNorm = 0.6053, lr_0 = 5.2398e-04
Loss = 1.4590e-01, PNorm = 72.0840, GNorm = 0.6424, lr_0 = 5.2362e-04
Loss = 1.5857e-01, PNorm = 72.0932, GNorm = 1.2762, lr_0 = 5.2326e-04
Loss = 1.4291e-01, PNorm = 72.1080, GNorm = 1.8750, lr_0 = 5.2290e-04
Loss = 1.5310e-01, PNorm = 72.1165, GNorm = 0.8534, lr_0 = 5.2255e-04
Loss = 1.5269e-01, PNorm = 72.1309, GNorm = 0.5782, lr_0 = 5.2219e-04
Loss = 1.3942e-01, PNorm = 72.1460, GNorm = 0.6419, lr_0 = 5.2183e-04
Loss = 1.3408e-01, PNorm = 72.1665, GNorm = 0.8216, lr_0 = 5.2147e-04
Loss = 1.3188e-01, PNorm = 72.1806, GNorm = 0.7757, lr_0 = 5.2112e-04
Loss = 1.2327e-01, PNorm = 72.1979, GNorm = 0.9052, lr_0 = 5.2076e-04
Loss = 1.7024e-01, PNorm = 72.2008, GNorm = 0.5926, lr_0 = 5.2040e-04
Loss = 1.2402e-01, PNorm = 72.2078, GNorm = 1.0019, lr_0 = 5.2005e-04
Loss = 1.4914e-01, PNorm = 72.2185, GNorm = 0.7032, lr_0 = 5.1969e-04
Loss = 1.3362e-01, PNorm = 72.2334, GNorm = 0.7009, lr_0 = 5.1933e-04
Loss = 1.3005e-01, PNorm = 72.2525, GNorm = 0.6211, lr_0 = 5.1898e-04
Loss = 1.3859e-01, PNorm = 72.2702, GNorm = 0.5957, lr_0 = 5.1862e-04
Loss = 1.5010e-01, PNorm = 72.2838, GNorm = 1.4450, lr_0 = 5.1827e-04
Loss = 1.3039e-01, PNorm = 72.3030, GNorm = 0.6135, lr_0 = 5.1791e-04
Validation mae = 0.246715
Epoch 10
Loss = 1.3517e-01, PNorm = 72.3128, GNorm = 0.5630, lr_0 = 5.1756e-04
Loss = 1.4543e-01, PNorm = 72.3290, GNorm = 0.7971, lr_0 = 5.1720e-04
Loss = 1.1104e-01, PNorm = 72.3462, GNorm = 0.4548, lr_0 = 5.1685e-04
Loss = 1.1794e-01, PNorm = 72.3601, GNorm = 0.6539, lr_0 = 5.1649e-04
Loss = 1.3441e-01, PNorm = 72.3717, GNorm = 0.8077, lr_0 = 5.1614e-04
Loss = 1.2668e-01, PNorm = 72.3846, GNorm = 0.7574, lr_0 = 5.1579e-04
Loss = 1.1253e-01, PNorm = 72.4033, GNorm = 0.4349, lr_0 = 5.1543e-04
Loss = 1.2727e-01, PNorm = 72.4233, GNorm = 0.8998, lr_0 = 5.1508e-04
Loss = 1.3134e-01, PNorm = 72.4385, GNorm = 0.3892, lr_0 = 5.1473e-04
Loss = 1.3317e-01, PNorm = 72.4468, GNorm = 1.0992, lr_0 = 5.1437e-04
Loss = 1.5673e-01, PNorm = 72.4581, GNorm = 1.2631, lr_0 = 5.1402e-04
Loss = 1.3086e-01, PNorm = 72.4714, GNorm = 0.7230, lr_0 = 5.1367e-04
Loss = 1.2171e-01, PNorm = 72.4888, GNorm = 0.7887, lr_0 = 5.1332e-04
Loss = 1.4100e-01, PNorm = 72.5038, GNorm = 1.0654, lr_0 = 5.1297e-04
Loss = 1.2570e-01, PNorm = 72.5195, GNorm = 0.7662, lr_0 = 5.1262e-04
Loss = 1.4086e-01, PNorm = 72.5317, GNorm = 0.9055, lr_0 = 5.1226e-04
Loss = 1.3017e-01, PNorm = 72.5447, GNorm = 0.5685, lr_0 = 5.1191e-04
Loss = 1.2761e-01, PNorm = 72.5571, GNorm = 0.7648, lr_0 = 5.1156e-04
Loss = 1.5236e-01, PNorm = 72.5681, GNorm = 1.4205, lr_0 = 5.1121e-04
Loss = 1.1868e-01, PNorm = 72.5814, GNorm = 0.5513, lr_0 = 5.1086e-04
Loss = 1.2823e-01, PNorm = 72.6004, GNorm = 0.5666, lr_0 = 5.1051e-04
Loss = 1.2004e-01, PNorm = 72.6171, GNorm = 0.8815, lr_0 = 5.1016e-04
Loss = 1.3415e-01, PNorm = 72.6363, GNorm = 0.5393, lr_0 = 5.0981e-04
Loss = 1.3755e-01, PNorm = 72.6478, GNorm = 0.4882, lr_0 = 5.0946e-04
Loss = 1.5162e-01, PNorm = 72.6622, GNorm = 0.9721, lr_0 = 5.0911e-04
Loss = 1.4002e-01, PNorm = 72.6768, GNorm = 0.6280, lr_0 = 5.0877e-04
Loss = 1.5164e-01, PNorm = 72.6920, GNorm = 0.9937, lr_0 = 5.0842e-04
Loss = 1.3895e-01, PNorm = 72.7142, GNorm = 0.6102, lr_0 = 5.0807e-04
Loss = 1.3755e-01, PNorm = 72.7354, GNorm = 0.9258, lr_0 = 5.0772e-04
Loss = 1.2840e-01, PNorm = 72.7512, GNorm = 0.7589, lr_0 = 5.0737e-04
Loss = 1.2531e-01, PNorm = 72.7655, GNorm = 0.7558, lr_0 = 5.0703e-04
Loss = 1.2474e-01, PNorm = 72.7801, GNorm = 0.7690, lr_0 = 5.0668e-04
Loss = 1.2746e-01, PNorm = 72.7969, GNorm = 0.4279, lr_0 = 5.0633e-04
Loss = 1.3648e-01, PNorm = 72.8100, GNorm = 0.8011, lr_0 = 5.0598e-04
Loss = 1.2021e-01, PNorm = 72.8256, GNorm = 0.9199, lr_0 = 5.0564e-04
Loss = 1.2657e-01, PNorm = 72.8386, GNorm = 0.6683, lr_0 = 5.0529e-04
Loss = 1.3299e-01, PNorm = 72.8538, GNorm = 0.9115, lr_0 = 5.0494e-04
Loss = 1.4523e-01, PNorm = 72.8682, GNorm = 0.7677, lr_0 = 5.0460e-04
Loss = 1.1120e-01, PNorm = 72.8842, GNorm = 0.6370, lr_0 = 5.0425e-04
Loss = 1.4712e-01, PNorm = 72.8931, GNorm = 0.7225, lr_0 = 5.0391e-04
Loss = 1.3775e-01, PNorm = 72.9069, GNorm = 0.7679, lr_0 = 5.0356e-04
Loss = 1.2624e-01, PNorm = 72.9163, GNorm = 0.6548, lr_0 = 5.0322e-04
Loss = 1.3905e-01, PNorm = 72.9338, GNorm = 0.5854, lr_0 = 5.0287e-04
Loss = 1.4529e-01, PNorm = 72.9523, GNorm = 1.0641, lr_0 = 5.0253e-04
Loss = 1.3536e-01, PNorm = 72.9723, GNorm = 0.6136, lr_0 = 5.0218e-04
Loss = 1.3743e-01, PNorm = 72.9890, GNorm = 0.8682, lr_0 = 5.0184e-04
Loss = 1.1591e-01, PNorm = 73.0103, GNorm = 0.8643, lr_0 = 5.0150e-04
Loss = 1.3022e-01, PNorm = 73.0174, GNorm = 0.5814, lr_0 = 5.0115e-04
Loss = 1.2575e-01, PNorm = 73.0224, GNorm = 0.8773, lr_0 = 5.0081e-04
Loss = 1.1421e-01, PNorm = 73.0318, GNorm = 0.8341, lr_0 = 5.0047e-04
Loss = 1.4074e-01, PNorm = 73.0448, GNorm = 0.8947, lr_0 = 5.0012e-04
Loss = 1.3191e-01, PNorm = 73.0590, GNorm = 0.6764, lr_0 = 4.9978e-04
Loss = 1.6413e-01, PNorm = 73.0803, GNorm = 1.0634, lr_0 = 4.9944e-04
Loss = 1.3517e-01, PNorm = 73.0964, GNorm = 0.9340, lr_0 = 4.9910e-04
Loss = 1.3081e-01, PNorm = 73.1137, GNorm = 0.7407, lr_0 = 4.9875e-04
Loss = 1.4120e-01, PNorm = 73.1293, GNorm = 0.7336, lr_0 = 4.9841e-04
Loss = 1.1293e-01, PNorm = 73.1435, GNorm = 0.8708, lr_0 = 4.9807e-04
Loss = 1.3591e-01, PNorm = 73.1639, GNorm = 0.5686, lr_0 = 4.9773e-04
Loss = 1.3878e-01, PNorm = 73.1781, GNorm = 0.6754, lr_0 = 4.9739e-04
Loss = 1.4205e-01, PNorm = 73.1946, GNorm = 1.0555, lr_0 = 4.9705e-04
Loss = 1.5444e-01, PNorm = 73.2096, GNorm = 1.2976, lr_0 = 4.9671e-04
Loss = 1.4234e-01, PNorm = 73.2244, GNorm = 0.6981, lr_0 = 4.9637e-04
Loss = 1.4479e-01, PNorm = 73.2435, GNorm = 0.6581, lr_0 = 4.9603e-04
Loss = 1.3196e-01, PNorm = 73.2567, GNorm = 0.7002, lr_0 = 4.9569e-04
Loss = 1.3605e-01, PNorm = 73.2722, GNorm = 1.0335, lr_0 = 4.9535e-04
Loss = 1.2950e-01, PNorm = 73.2817, GNorm = 0.8272, lr_0 = 4.9501e-04
Loss = 1.3118e-01, PNorm = 73.2893, GNorm = 0.6613, lr_0 = 4.9467e-04
Loss = 1.2443e-01, PNorm = 73.3031, GNorm = 0.4617, lr_0 = 4.9433e-04
Loss = 1.3633e-01, PNorm = 73.3211, GNorm = 0.8571, lr_0 = 4.9399e-04
Loss = 1.1992e-01, PNorm = 73.3376, GNorm = 0.4644, lr_0 = 4.9365e-04
Loss = 1.3398e-01, PNorm = 73.3513, GNorm = 0.6263, lr_0 = 4.9332e-04
Loss = 1.3606e-01, PNorm = 73.3655, GNorm = 0.8187, lr_0 = 4.9298e-04
Loss = 1.2612e-01, PNorm = 73.3795, GNorm = 0.9287, lr_0 = 4.9264e-04
Loss = 1.2337e-01, PNorm = 73.3995, GNorm = 1.3397, lr_0 = 4.9230e-04
Loss = 1.5017e-01, PNorm = 73.4126, GNorm = 0.6705, lr_0 = 4.9197e-04
Loss = 1.5014e-01, PNorm = 73.4203, GNorm = 0.5707, lr_0 = 4.9163e-04
Loss = 1.3483e-01, PNorm = 73.4362, GNorm = 0.9069, lr_0 = 4.9129e-04
Loss = 1.3544e-01, PNorm = 73.4525, GNorm = 0.6061, lr_0 = 4.9095e-04
Loss = 1.3912e-01, PNorm = 73.4628, GNorm = 0.6721, lr_0 = 4.9062e-04
Loss = 1.2801e-01, PNorm = 73.4677, GNorm = 0.6660, lr_0 = 4.9028e-04
Loss = 1.3513e-01, PNorm = 73.4779, GNorm = 0.7082, lr_0 = 4.8995e-04
Loss = 1.2894e-01, PNorm = 73.4908, GNorm = 0.5611, lr_0 = 4.8961e-04
Loss = 1.3941e-01, PNorm = 73.5047, GNorm = 0.6621, lr_0 = 4.8928e-04
Loss = 1.4104e-01, PNorm = 73.5203, GNorm = 0.7603, lr_0 = 4.8894e-04
Loss = 1.4769e-01, PNorm = 73.5393, GNorm = 0.6574, lr_0 = 4.8861e-04
Loss = 1.2512e-01, PNorm = 73.5528, GNorm = 1.0029, lr_0 = 4.8827e-04
Loss = 1.3390e-01, PNorm = 73.5700, GNorm = 0.6809, lr_0 = 4.8794e-04
Loss = 1.3047e-01, PNorm = 73.5823, GNorm = 0.7554, lr_0 = 4.8760e-04
Loss = 1.3373e-01, PNorm = 73.5897, GNorm = 0.5473, lr_0 = 4.8727e-04
Loss = 1.3407e-01, PNorm = 73.5989, GNorm = 0.7019, lr_0 = 4.8693e-04
Loss = 1.2162e-01, PNorm = 73.6077, GNorm = 0.7024, lr_0 = 4.8660e-04
Loss = 1.2757e-01, PNorm = 73.6139, GNorm = 0.6495, lr_0 = 4.8627e-04
Loss = 1.2778e-01, PNorm = 73.6231, GNorm = 0.8188, lr_0 = 4.8593e-04
Loss = 1.3196e-01, PNorm = 73.6354, GNorm = 0.7411, lr_0 = 4.8560e-04
Loss = 1.2987e-01, PNorm = 73.6522, GNorm = 0.6175, lr_0 = 4.8527e-04
Loss = 1.4091e-01, PNorm = 73.6643, GNorm = 0.8183, lr_0 = 4.8494e-04
Loss = 1.3251e-01, PNorm = 73.6809, GNorm = 0.7867, lr_0 = 4.8460e-04
Loss = 1.3103e-01, PNorm = 73.6933, GNorm = 1.1016, lr_0 = 4.8427e-04
Loss = 1.1910e-01, PNorm = 73.7035, GNorm = 0.5974, lr_0 = 4.8394e-04
Loss = 1.3221e-01, PNorm = 73.7120, GNorm = 0.6441, lr_0 = 4.8361e-04
Loss = 1.4262e-01, PNorm = 73.7294, GNorm = 1.0855, lr_0 = 4.8328e-04
Loss = 1.4444e-01, PNorm = 73.7460, GNorm = 1.0248, lr_0 = 4.8295e-04
Loss = 1.2734e-01, PNorm = 73.7550, GNorm = 1.4268, lr_0 = 4.8262e-04
Loss = 1.3157e-01, PNorm = 73.7669, GNorm = 0.8522, lr_0 = 4.8228e-04
Loss = 1.4137e-01, PNorm = 73.7809, GNorm = 0.5044, lr_0 = 4.8195e-04
Loss = 1.3550e-01, PNorm = 73.7953, GNorm = 0.9511, lr_0 = 4.8162e-04
Loss = 1.3335e-01, PNorm = 73.8030, GNorm = 0.8633, lr_0 = 4.8129e-04
Loss = 1.3406e-01, PNorm = 73.8185, GNorm = 1.1133, lr_0 = 4.8096e-04
Loss = 1.3900e-01, PNorm = 73.8347, GNorm = 0.6390, lr_0 = 4.8064e-04
Loss = 1.2801e-01, PNorm = 73.8482, GNorm = 0.5507, lr_0 = 4.8031e-04
Loss = 1.4144e-01, PNorm = 73.8591, GNorm = 0.7159, lr_0 = 4.7998e-04
Loss = 1.3715e-01, PNorm = 73.8718, GNorm = 1.2361, lr_0 = 4.7965e-04
Loss = 1.4330e-01, PNorm = 73.8863, GNorm = 0.7619, lr_0 = 4.7932e-04
Loss = 1.4916e-01, PNorm = 73.9029, GNorm = 0.7502, lr_0 = 4.7899e-04
Loss = 1.3919e-01, PNorm = 73.9241, GNorm = 1.0479, lr_0 = 4.7866e-04
Loss = 1.4262e-01, PNorm = 73.9404, GNorm = 0.7495, lr_0 = 4.7833e-04
Loss = 1.4335e-01, PNorm = 73.9559, GNorm = 0.6910, lr_0 = 4.7801e-04
Loss = 1.3704e-01, PNorm = 73.9696, GNorm = 1.2480, lr_0 = 4.7768e-04
Loss = 1.4197e-01, PNorm = 73.9876, GNorm = 0.6287, lr_0 = 4.7735e-04
Loss = 1.6014e-01, PNorm = 74.0030, GNorm = 0.7578, lr_0 = 4.7703e-04
Validation mae = 0.235331
Epoch 11
Loss = 1.2444e-01, PNorm = 74.0195, GNorm = 0.6698, lr_0 = 4.7670e-04
Loss = 1.4059e-01, PNorm = 74.0332, GNorm = 0.9915, lr_0 = 4.7637e-04
Loss = 1.2043e-01, PNorm = 74.0483, GNorm = 0.7036, lr_0 = 4.7605e-04
Loss = 1.4013e-01, PNorm = 74.0623, GNorm = 1.0339, lr_0 = 4.7572e-04
Loss = 1.2443e-01, PNorm = 74.0719, GNorm = 0.7823, lr_0 = 4.7539e-04
Loss = 1.1923e-01, PNorm = 74.0853, GNorm = 1.0036, lr_0 = 4.7507e-04
Loss = 1.1579e-01, PNorm = 74.0946, GNorm = 0.5975, lr_0 = 4.7474e-04
Loss = 1.2438e-01, PNorm = 74.1093, GNorm = 0.6784, lr_0 = 4.7442e-04
Loss = 1.2585e-01, PNorm = 74.1223, GNorm = 1.0630, lr_0 = 4.7409e-04
Loss = 1.1690e-01, PNorm = 74.1371, GNorm = 0.6137, lr_0 = 4.7377e-04
Loss = 9.1507e-02, PNorm = 74.1516, GNorm = 0.4244, lr_0 = 4.7344e-04
Loss = 1.1924e-01, PNorm = 74.1606, GNorm = 0.4664, lr_0 = 4.7312e-04
Loss = 1.3674e-01, PNorm = 74.1738, GNorm = 0.4236, lr_0 = 4.7279e-04
Loss = 1.4779e-01, PNorm = 74.1881, GNorm = 1.6905, lr_0 = 4.7247e-04
Loss = 1.3364e-01, PNorm = 74.2077, GNorm = 0.6634, lr_0 = 4.7215e-04
Loss = 1.2865e-01, PNorm = 74.2219, GNorm = 0.6157, lr_0 = 4.7182e-04
Loss = 1.3126e-01, PNorm = 74.2386, GNorm = 0.5386, lr_0 = 4.7150e-04
Loss = 1.2623e-01, PNorm = 74.2503, GNorm = 0.6835, lr_0 = 4.7118e-04
Loss = 1.4020e-01, PNorm = 74.2610, GNorm = 0.7633, lr_0 = 4.7085e-04
Loss = 1.3847e-01, PNorm = 74.2699, GNorm = 1.3713, lr_0 = 4.7053e-04
Loss = 1.2983e-01, PNorm = 74.2800, GNorm = 0.6137, lr_0 = 4.7021e-04
Loss = 1.1574e-01, PNorm = 74.2938, GNorm = 0.8886, lr_0 = 4.6989e-04
Loss = 1.1052e-01, PNorm = 74.3076, GNorm = 0.6748, lr_0 = 4.6957e-04
Loss = 1.2022e-01, PNorm = 74.3192, GNorm = 0.6328, lr_0 = 4.6924e-04
Loss = 1.2787e-01, PNorm = 74.3312, GNorm = 0.6176, lr_0 = 4.6892e-04
Loss = 1.1238e-01, PNorm = 74.3457, GNorm = 0.6757, lr_0 = 4.6860e-04
Loss = 1.3313e-01, PNorm = 74.3620, GNorm = 0.7679, lr_0 = 4.6828e-04
Loss = 1.4121e-01, PNorm = 74.3738, GNorm = 0.4772, lr_0 = 4.6796e-04
Loss = 1.2439e-01, PNorm = 74.3790, GNorm = 1.2579, lr_0 = 4.6764e-04
Loss = 1.3859e-01, PNorm = 74.3877, GNorm = 1.2752, lr_0 = 4.6732e-04
Loss = 1.3511e-01, PNorm = 74.4062, GNorm = 1.0350, lr_0 = 4.6700e-04
Loss = 1.4725e-01, PNorm = 74.4211, GNorm = 0.6364, lr_0 = 4.6668e-04
Loss = 1.3371e-01, PNorm = 74.4359, GNorm = 0.6236, lr_0 = 4.6636e-04
Loss = 1.1914e-01, PNorm = 74.4443, GNorm = 0.6088, lr_0 = 4.6604e-04
Loss = 1.2447e-01, PNorm = 74.4546, GNorm = 0.7113, lr_0 = 4.6572e-04
Loss = 1.1723e-01, PNorm = 74.4614, GNorm = 0.6687, lr_0 = 4.6540e-04
Loss = 1.1586e-01, PNorm = 74.4682, GNorm = 0.6992, lr_0 = 4.6508e-04
Loss = 1.2864e-01, PNorm = 74.4756, GNorm = 0.6821, lr_0 = 4.6476e-04
Loss = 1.3935e-01, PNorm = 74.4863, GNorm = 0.6860, lr_0 = 4.6445e-04
Loss = 1.3806e-01, PNorm = 74.5023, GNorm = 1.1678, lr_0 = 4.6413e-04
Loss = 1.1919e-01, PNorm = 74.5179, GNorm = 0.5962, lr_0 = 4.6381e-04
Loss = 1.2895e-01, PNorm = 74.5338, GNorm = 0.9318, lr_0 = 4.6349e-04
Loss = 1.2799e-01, PNorm = 74.5514, GNorm = 0.7833, lr_0 = 4.6317e-04
Loss = 1.4013e-01, PNorm = 74.5695, GNorm = 1.1374, lr_0 = 4.6286e-04
Loss = 1.2645e-01, PNorm = 74.5875, GNorm = 0.6282, lr_0 = 4.6254e-04
Loss = 1.2053e-01, PNorm = 74.6049, GNorm = 0.7751, lr_0 = 4.6222e-04
Loss = 1.1605e-01, PNorm = 74.6175, GNorm = 0.5417, lr_0 = 4.6191e-04
Loss = 1.0573e-01, PNorm = 74.6274, GNorm = 0.4772, lr_0 = 4.6159e-04
Loss = 1.2795e-01, PNorm = 74.6359, GNorm = 0.4843, lr_0 = 4.6127e-04
Loss = 1.4501e-01, PNorm = 74.6507, GNorm = 0.8467, lr_0 = 4.6096e-04
Loss = 1.3361e-01, PNorm = 74.6630, GNorm = 0.6537, lr_0 = 4.6064e-04
Loss = 1.2130e-01, PNorm = 74.6755, GNorm = 0.6131, lr_0 = 4.6033e-04
Loss = 1.2771e-01, PNorm = 74.6883, GNorm = 0.8180, lr_0 = 4.6001e-04
Loss = 1.3673e-01, PNorm = 74.7015, GNorm = 0.8132, lr_0 = 4.5970e-04
Loss = 1.2507e-01, PNorm = 74.7138, GNorm = 0.5395, lr_0 = 4.5938e-04
Loss = 1.3463e-01, PNorm = 74.7259, GNorm = 0.6879, lr_0 = 4.5907e-04
Loss = 1.1340e-01, PNorm = 74.7428, GNorm = 0.9979, lr_0 = 4.5875e-04
Loss = 1.3334e-01, PNorm = 74.7550, GNorm = 0.6925, lr_0 = 4.5844e-04
Loss = 1.2871e-01, PNorm = 74.7635, GNorm = 0.6457, lr_0 = 4.5812e-04
Loss = 1.4419e-01, PNorm = 74.7757, GNorm = 0.6224, lr_0 = 4.5781e-04
Loss = 1.1887e-01, PNorm = 74.7897, GNorm = 0.7319, lr_0 = 4.5750e-04
Loss = 1.3000e-01, PNorm = 74.8019, GNorm = 0.6596, lr_0 = 4.5718e-04
Loss = 1.2450e-01, PNorm = 74.8183, GNorm = 0.9290, lr_0 = 4.5687e-04
Loss = 1.2908e-01, PNorm = 74.8300, GNorm = 0.6317, lr_0 = 4.5656e-04
Loss = 1.2367e-01, PNorm = 74.8445, GNorm = 0.8728, lr_0 = 4.5624e-04
Loss = 1.2817e-01, PNorm = 74.8589, GNorm = 0.7258, lr_0 = 4.5593e-04
Loss = 1.1197e-01, PNorm = 74.8666, GNorm = 0.5608, lr_0 = 4.5562e-04
Loss = 1.2894e-01, PNorm = 74.8766, GNorm = 0.6625, lr_0 = 4.5531e-04
Loss = 1.4125e-01, PNorm = 74.8899, GNorm = 0.9105, lr_0 = 4.5499e-04
Loss = 1.1701e-01, PNorm = 74.9052, GNorm = 0.5335, lr_0 = 4.5468e-04
Loss = 1.1128e-01, PNorm = 74.9209, GNorm = 0.8609, lr_0 = 4.5437e-04
Loss = 1.2546e-01, PNorm = 74.9336, GNorm = 0.6473, lr_0 = 4.5406e-04
Loss = 1.3369e-01, PNorm = 74.9454, GNorm = 0.7709, lr_0 = 4.5375e-04
Loss = 1.2465e-01, PNorm = 74.9551, GNorm = 0.8727, lr_0 = 4.5344e-04
Loss = 1.2347e-01, PNorm = 74.9653, GNorm = 0.5959, lr_0 = 4.5313e-04
Loss = 1.4539e-01, PNorm = 74.9729, GNorm = 0.7429, lr_0 = 4.5282e-04
Loss = 1.2062e-01, PNorm = 74.9831, GNorm = 0.8811, lr_0 = 4.5251e-04
Loss = 1.3739e-01, PNorm = 74.9969, GNorm = 0.7947, lr_0 = 4.5220e-04
Loss = 1.1699e-01, PNorm = 75.0058, GNorm = 0.4450, lr_0 = 4.5189e-04
Loss = 1.3562e-01, PNorm = 75.0154, GNorm = 0.7564, lr_0 = 4.5158e-04
Loss = 1.2846e-01, PNorm = 75.0263, GNorm = 1.0052, lr_0 = 4.5127e-04
Loss = 1.3356e-01, PNorm = 75.0424, GNorm = 0.6045, lr_0 = 4.5096e-04
Loss = 1.3823e-01, PNorm = 75.0586, GNorm = 0.8514, lr_0 = 4.5065e-04
Loss = 1.3304e-01, PNorm = 75.0753, GNorm = 1.1823, lr_0 = 4.5034e-04
Loss = 1.3941e-01, PNorm = 75.0919, GNorm = 1.1960, lr_0 = 4.5003e-04
Loss = 1.2078e-01, PNorm = 75.1043, GNorm = 0.6122, lr_0 = 4.4972e-04
Loss = 1.2324e-01, PNorm = 75.1204, GNorm = 0.7170, lr_0 = 4.4942e-04
Loss = 1.3103e-01, PNorm = 75.1294, GNorm = 1.1079, lr_0 = 4.4911e-04
Loss = 1.4715e-01, PNorm = 75.1433, GNorm = 0.7719, lr_0 = 4.4880e-04
Loss = 1.3202e-01, PNorm = 75.1588, GNorm = 0.5515, lr_0 = 4.4849e-04
Loss = 1.0508e-01, PNorm = 75.1730, GNorm = 0.4900, lr_0 = 4.4819e-04
Loss = 1.1726e-01, PNorm = 75.1853, GNorm = 0.6465, lr_0 = 4.4788e-04
Loss = 1.5131e-01, PNorm = 75.1951, GNorm = 0.5911, lr_0 = 4.4757e-04
Loss = 1.3761e-01, PNorm = 75.2052, GNorm = 0.9393, lr_0 = 4.4727e-04
Loss = 1.6000e-01, PNorm = 75.2224, GNorm = 0.6467, lr_0 = 4.4696e-04
Loss = 1.2837e-01, PNorm = 75.2392, GNorm = 0.7067, lr_0 = 4.4665e-04
Loss = 1.2239e-01, PNorm = 75.2509, GNorm = 1.0222, lr_0 = 4.4635e-04
Loss = 1.1906e-01, PNorm = 75.2601, GNorm = 0.9223, lr_0 = 4.4604e-04
Loss = 1.3362e-01, PNorm = 75.2716, GNorm = 0.6743, lr_0 = 4.4574e-04
Loss = 1.1599e-01, PNorm = 75.2824, GNorm = 0.6726, lr_0 = 4.4543e-04
Loss = 1.3551e-01, PNorm = 75.2916, GNorm = 0.7696, lr_0 = 4.4513e-04
Loss = 1.3290e-01, PNorm = 75.2989, GNorm = 0.7715, lr_0 = 4.4482e-04
Loss = 1.3141e-01, PNorm = 75.3107, GNorm = 0.6972, lr_0 = 4.4452e-04
Loss = 1.1784e-01, PNorm = 75.3285, GNorm = 0.4127, lr_0 = 4.4421e-04
Loss = 1.2389e-01, PNorm = 75.3373, GNorm = 0.6995, lr_0 = 4.4391e-04
Loss = 1.2158e-01, PNorm = 75.3475, GNorm = 0.6768, lr_0 = 4.4360e-04
Loss = 1.2471e-01, PNorm = 75.3572, GNorm = 0.8613, lr_0 = 4.4330e-04
Loss = 1.3455e-01, PNorm = 75.3654, GNorm = 0.6848, lr_0 = 4.4299e-04
Loss = 1.1608e-01, PNorm = 75.3769, GNorm = 0.7853, lr_0 = 4.4269e-04
Loss = 1.3699e-01, PNorm = 75.3902, GNorm = 0.9595, lr_0 = 4.4239e-04
Loss = 1.1123e-01, PNorm = 75.4035, GNorm = 0.9806, lr_0 = 4.4209e-04
Loss = 1.1956e-01, PNorm = 75.4134, GNorm = 0.8469, lr_0 = 4.4178e-04
Loss = 1.4195e-01, PNorm = 75.4189, GNorm = 0.9313, lr_0 = 4.4148e-04
Loss = 1.1563e-01, PNorm = 75.4250, GNorm = 0.7215, lr_0 = 4.4118e-04
Loss = 1.3644e-01, PNorm = 75.4309, GNorm = 0.7366, lr_0 = 4.4088e-04
Loss = 1.3900e-01, PNorm = 75.4377, GNorm = 1.0096, lr_0 = 4.4057e-04
Loss = 1.3110e-01, PNorm = 75.4507, GNorm = 0.5574, lr_0 = 4.4027e-04
Loss = 1.2279e-01, PNorm = 75.4672, GNorm = 0.5146, lr_0 = 4.3997e-04
Loss = 1.2871e-01, PNorm = 75.4823, GNorm = 0.5520, lr_0 = 4.3967e-04
Loss = 1.2866e-01, PNorm = 75.4899, GNorm = 0.7206, lr_0 = 4.3937e-04
Validation mae = 0.234781
Epoch 12
Loss = 1.2204e-01, PNorm = 75.5001, GNorm = 0.7261, lr_0 = 4.3907e-04
Loss = 1.2871e-01, PNorm = 75.5127, GNorm = 1.1241, lr_0 = 4.3877e-04
Loss = 1.0421e-01, PNorm = 75.5271, GNorm = 0.6466, lr_0 = 4.3846e-04
Loss = 1.2510e-01, PNorm = 75.5391, GNorm = 0.8772, lr_0 = 4.3816e-04
Loss = 1.1126e-01, PNorm = 75.5494, GNorm = 0.5005, lr_0 = 4.3786e-04
Loss = 1.1732e-01, PNorm = 75.5600, GNorm = 0.7076, lr_0 = 4.3756e-04
Loss = 1.1021e-01, PNorm = 75.5666, GNorm = 0.5982, lr_0 = 4.3726e-04
Loss = 1.1424e-01, PNorm = 75.5732, GNorm = 0.8082, lr_0 = 4.3696e-04
Loss = 1.1484e-01, PNorm = 75.5843, GNorm = 0.5901, lr_0 = 4.3667e-04
Loss = 1.1782e-01, PNorm = 75.5981, GNorm = 0.5603, lr_0 = 4.3637e-04
Loss = 1.0734e-01, PNorm = 75.6061, GNorm = 0.6109, lr_0 = 4.3607e-04
Loss = 1.1661e-01, PNorm = 75.6193, GNorm = 0.5721, lr_0 = 4.3577e-04
Loss = 1.2288e-01, PNorm = 75.6276, GNorm = 0.6379, lr_0 = 4.3547e-04
Loss = 1.4498e-01, PNorm = 75.6404, GNorm = 0.7441, lr_0 = 4.3517e-04
Loss = 1.1912e-01, PNorm = 75.6556, GNorm = 0.6859, lr_0 = 4.3487e-04
Loss = 1.2037e-01, PNorm = 75.6697, GNorm = 0.8069, lr_0 = 4.3458e-04
Loss = 1.5358e-01, PNorm = 75.6824, GNorm = 1.0566, lr_0 = 4.3428e-04
Loss = 1.1599e-01, PNorm = 75.6956, GNorm = 0.6631, lr_0 = 4.3398e-04
Loss = 1.3236e-01, PNorm = 75.7109, GNorm = 0.8965, lr_0 = 4.3368e-04
Loss = 1.3434e-01, PNorm = 75.7268, GNorm = 0.7521, lr_0 = 4.3339e-04
Loss = 1.1522e-01, PNorm = 75.7417, GNorm = 0.5385, lr_0 = 4.3309e-04
Loss = 1.1921e-01, PNorm = 75.7487, GNorm = 0.5212, lr_0 = 4.3279e-04
Loss = 1.3095e-01, PNorm = 75.7577, GNorm = 0.7915, lr_0 = 4.3250e-04
Loss = 1.1734e-01, PNorm = 75.7668, GNorm = 0.8910, lr_0 = 4.3220e-04
Loss = 1.2533e-01, PNorm = 75.7806, GNorm = 0.7143, lr_0 = 4.3190e-04
Loss = 1.4947e-01, PNorm = 75.7912, GNorm = 0.7262, lr_0 = 4.3161e-04
Loss = 1.1337e-01, PNorm = 75.8056, GNorm = 1.0427, lr_0 = 4.3131e-04
Loss = 1.3018e-01, PNorm = 75.8184, GNorm = 0.6628, lr_0 = 4.3102e-04
Loss = 1.2131e-01, PNorm = 75.8290, GNorm = 0.5861, lr_0 = 4.3072e-04
Loss = 1.0979e-01, PNorm = 75.8438, GNorm = 0.5798, lr_0 = 4.3043e-04
Loss = 1.3028e-01, PNorm = 75.8587, GNorm = 0.6689, lr_0 = 4.3013e-04
Loss = 1.2236e-01, PNorm = 75.8646, GNorm = 0.5350, lr_0 = 4.2984e-04
Loss = 1.0434e-01, PNorm = 75.8725, GNorm = 0.6035, lr_0 = 4.2954e-04
Loss = 1.2892e-01, PNorm = 75.8778, GNorm = 1.2074, lr_0 = 4.2925e-04
Loss = 1.1200e-01, PNorm = 75.8871, GNorm = 0.7317, lr_0 = 4.2895e-04
Loss = 1.2527e-01, PNorm = 75.9050, GNorm = 0.6346, lr_0 = 4.2866e-04
Loss = 1.3719e-01, PNorm = 75.9174, GNorm = 0.7996, lr_0 = 4.2837e-04
Loss = 1.1225e-01, PNorm = 75.9293, GNorm = 0.7447, lr_0 = 4.2807e-04
Loss = 1.3479e-01, PNorm = 75.9365, GNorm = 0.7470, lr_0 = 4.2778e-04
Loss = 1.0878e-01, PNorm = 75.9522, GNorm = 0.4089, lr_0 = 4.2749e-04
Loss = 1.0983e-01, PNorm = 75.9680, GNorm = 1.1279, lr_0 = 4.2719e-04
Loss = 1.0274e-01, PNorm = 75.9809, GNorm = 0.7129, lr_0 = 4.2690e-04
Loss = 1.2351e-01, PNorm = 75.9913, GNorm = 0.4841, lr_0 = 4.2661e-04
Loss = 1.1611e-01, PNorm = 76.0003, GNorm = 0.6331, lr_0 = 4.2632e-04
Loss = 1.1792e-01, PNorm = 76.0070, GNorm = 0.6874, lr_0 = 4.2602e-04
Loss = 1.2352e-01, PNorm = 76.0182, GNorm = 0.6975, lr_0 = 4.2573e-04
Loss = 1.1245e-01, PNorm = 76.0296, GNorm = 1.2363, lr_0 = 4.2544e-04
Loss = 1.1412e-01, PNorm = 76.0429, GNorm = 0.8463, lr_0 = 4.2515e-04
Loss = 1.1849e-01, PNorm = 76.0557, GNorm = 1.0836, lr_0 = 4.2486e-04
Loss = 1.1306e-01, PNorm = 76.0669, GNorm = 0.6980, lr_0 = 4.2457e-04
Loss = 1.2262e-01, PNorm = 76.0788, GNorm = 0.8224, lr_0 = 4.2428e-04
Loss = 1.2091e-01, PNorm = 76.0876, GNorm = 0.9506, lr_0 = 4.2399e-04
Loss = 1.1238e-01, PNorm = 76.0940, GNorm = 0.8335, lr_0 = 4.2370e-04
Loss = 1.2016e-01, PNorm = 76.1030, GNorm = 0.8397, lr_0 = 4.2340e-04
Loss = 1.1981e-01, PNorm = 76.1139, GNorm = 0.5307, lr_0 = 4.2311e-04
Loss = 1.1036e-01, PNorm = 76.1238, GNorm = 0.7838, lr_0 = 4.2283e-04
Loss = 1.3019e-01, PNorm = 76.1386, GNorm = 0.7318, lr_0 = 4.2254e-04
Loss = 1.3141e-01, PNorm = 76.1501, GNorm = 0.4812, lr_0 = 4.2225e-04
Loss = 1.3456e-01, PNorm = 76.1624, GNorm = 0.9723, lr_0 = 4.2196e-04
Loss = 1.1223e-01, PNorm = 76.1828, GNorm = 0.6452, lr_0 = 4.2167e-04
Loss = 1.4085e-01, PNorm = 76.2001, GNorm = 0.6903, lr_0 = 4.2138e-04
Loss = 1.1683e-01, PNorm = 76.2085, GNorm = 1.0301, lr_0 = 4.2109e-04
Loss = 1.1064e-01, PNorm = 76.2146, GNorm = 0.7311, lr_0 = 4.2080e-04
Loss = 1.1030e-01, PNorm = 76.2235, GNorm = 0.7900, lr_0 = 4.2051e-04
Loss = 1.2309e-01, PNorm = 76.2342, GNorm = 0.6322, lr_0 = 4.2023e-04
Loss = 1.2704e-01, PNorm = 76.2412, GNorm = 0.5569, lr_0 = 4.1994e-04
Loss = 1.2410e-01, PNorm = 76.2492, GNorm = 0.4915, lr_0 = 4.1965e-04
Loss = 1.1420e-01, PNorm = 76.2604, GNorm = 0.4661, lr_0 = 4.1936e-04
Loss = 1.2697e-01, PNorm = 76.2700, GNorm = 0.6297, lr_0 = 4.1907e-04
Loss = 1.1610e-01, PNorm = 76.2796, GNorm = 0.5930, lr_0 = 4.1879e-04
Loss = 1.2337e-01, PNorm = 76.2915, GNorm = 0.6317, lr_0 = 4.1850e-04
Loss = 1.4239e-01, PNorm = 76.3081, GNorm = 0.6927, lr_0 = 4.1821e-04
Loss = 1.2191e-01, PNorm = 76.3212, GNorm = 0.7309, lr_0 = 4.1793e-04
Loss = 1.2524e-01, PNorm = 76.3357, GNorm = 0.5118, lr_0 = 4.1764e-04
Loss = 1.1237e-01, PNorm = 76.3474, GNorm = 0.7023, lr_0 = 4.1736e-04
Loss = 1.0704e-01, PNorm = 76.3585, GNorm = 0.5060, lr_0 = 4.1707e-04
Loss = 1.3799e-01, PNorm = 76.3714, GNorm = 1.1420, lr_0 = 4.1678e-04
Loss = 1.4090e-01, PNorm = 76.3832, GNorm = 0.8048, lr_0 = 4.1650e-04
Loss = 1.3672e-01, PNorm = 76.3971, GNorm = 0.9407, lr_0 = 4.1621e-04
Loss = 1.1025e-01, PNorm = 76.4119, GNorm = 0.4165, lr_0 = 4.1593e-04
Loss = 1.2931e-01, PNorm = 76.4213, GNorm = 0.8913, lr_0 = 4.1564e-04
Loss = 1.2056e-01, PNorm = 76.4287, GNorm = 0.4744, lr_0 = 4.1536e-04
Loss = 1.2944e-01, PNorm = 76.4373, GNorm = 0.8430, lr_0 = 4.1507e-04
Loss = 1.1635e-01, PNorm = 76.4447, GNorm = 0.9318, lr_0 = 4.1479e-04
Loss = 1.1740e-01, PNorm = 76.4519, GNorm = 0.6822, lr_0 = 4.1450e-04
Loss = 1.2875e-01, PNorm = 76.4641, GNorm = 0.5782, lr_0 = 4.1422e-04
Loss = 1.0621e-01, PNorm = 76.4720, GNorm = 0.7271, lr_0 = 4.1394e-04
Loss = 1.1906e-01, PNorm = 76.4781, GNorm = 0.6485, lr_0 = 4.1365e-04
Loss = 1.3008e-01, PNorm = 76.4860, GNorm = 0.5418, lr_0 = 4.1337e-04
Loss = 1.2709e-01, PNorm = 76.4996, GNorm = 0.6495, lr_0 = 4.1309e-04
Loss = 1.2694e-01, PNorm = 76.5124, GNorm = 0.9520, lr_0 = 4.1280e-04
Loss = 1.1996e-01, PNorm = 76.5240, GNorm = 0.5740, lr_0 = 4.1252e-04
Loss = 1.1906e-01, PNorm = 76.5313, GNorm = 0.6106, lr_0 = 4.1224e-04
Loss = 1.3200e-01, PNorm = 76.5441, GNorm = 0.5811, lr_0 = 4.1196e-04
Loss = 1.3282e-01, PNorm = 76.5544, GNorm = 0.7394, lr_0 = 4.1167e-04
Loss = 1.2632e-01, PNorm = 76.5647, GNorm = 0.5114, lr_0 = 4.1139e-04
Loss = 1.2847e-01, PNorm = 76.5752, GNorm = 0.7339, lr_0 = 4.1111e-04
Loss = 1.1638e-01, PNorm = 76.5837, GNorm = 0.8483, lr_0 = 4.1083e-04
Loss = 1.2052e-01, PNorm = 76.5956, GNorm = 0.4677, lr_0 = 4.1055e-04
Loss = 1.1898e-01, PNorm = 76.6145, GNorm = 0.4960, lr_0 = 4.1027e-04
Loss = 1.2909e-01, PNorm = 76.6269, GNorm = 0.8279, lr_0 = 4.0998e-04
Loss = 1.1634e-01, PNorm = 76.6343, GNorm = 0.4939, lr_0 = 4.0970e-04
Loss = 1.4485e-01, PNorm = 76.6470, GNorm = 1.0224, lr_0 = 4.0942e-04
Loss = 1.4010e-01, PNorm = 76.6567, GNorm = 0.9098, lr_0 = 4.0914e-04
Loss = 1.2936e-01, PNorm = 76.6686, GNorm = 0.9243, lr_0 = 4.0886e-04
Loss = 1.3918e-01, PNorm = 76.6749, GNorm = 0.8174, lr_0 = 4.0858e-04
Loss = 1.2259e-01, PNorm = 76.6842, GNorm = 0.8483, lr_0 = 4.0830e-04
Loss = 1.1238e-01, PNorm = 76.6927, GNorm = 1.0540, lr_0 = 4.0802e-04
Loss = 1.4506e-01, PNorm = 76.7096, GNorm = 0.8076, lr_0 = 4.0774e-04
Loss = 1.3577e-01, PNorm = 76.7196, GNorm = 0.7225, lr_0 = 4.0746e-04
Loss = 1.1973e-01, PNorm = 76.7320, GNorm = 0.7635, lr_0 = 4.0718e-04
Loss = 1.0674e-01, PNorm = 76.7453, GNorm = 0.5333, lr_0 = 4.0691e-04
Loss = 1.1705e-01, PNorm = 76.7562, GNorm = 0.5914, lr_0 = 4.0663e-04
Loss = 1.0937e-01, PNorm = 76.7638, GNorm = 0.6499, lr_0 = 4.0635e-04
Loss = 1.3260e-01, PNorm = 76.7722, GNorm = 1.0184, lr_0 = 4.0607e-04
Loss = 1.1700e-01, PNorm = 76.7821, GNorm = 0.7423, lr_0 = 4.0579e-04
Loss = 1.1946e-01, PNorm = 76.7904, GNorm = 0.8003, lr_0 = 4.0551e-04
Loss = 1.2669e-01, PNorm = 76.7983, GNorm = 0.5780, lr_0 = 4.0524e-04
Loss = 1.3644e-01, PNorm = 76.8109, GNorm = 0.6914, lr_0 = 4.0496e-04
Loss = 1.1774e-01, PNorm = 76.8220, GNorm = 0.5671, lr_0 = 4.0468e-04
Validation mae = 0.234970
Epoch 13
Loss = 1.0638e-01, PNorm = 76.8319, GNorm = 0.6088, lr_0 = 4.0440e-04
Loss = 1.1967e-01, PNorm = 76.8419, GNorm = 0.5484, lr_0 = 4.0413e-04
Loss = 1.0958e-01, PNorm = 76.8540, GNorm = 0.5476, lr_0 = 4.0385e-04
Loss = 1.1832e-01, PNorm = 76.8696, GNorm = 0.6185, lr_0 = 4.0357e-04
Loss = 1.1142e-01, PNorm = 76.8843, GNorm = 0.9818, lr_0 = 4.0330e-04
Loss = 1.0568e-01, PNorm = 76.8953, GNorm = 0.7416, lr_0 = 4.0302e-04
Loss = 1.0630e-01, PNorm = 76.9010, GNorm = 0.4938, lr_0 = 4.0274e-04
Loss = 1.0643e-01, PNorm = 76.9154, GNorm = 0.8421, lr_0 = 4.0247e-04
Loss = 1.1375e-01, PNorm = 76.9228, GNorm = 0.6335, lr_0 = 4.0219e-04
Loss = 1.0207e-01, PNorm = 76.9317, GNorm = 0.6930, lr_0 = 4.0192e-04
Loss = 1.1611e-01, PNorm = 76.9417, GNorm = 0.8929, lr_0 = 4.0164e-04
Loss = 1.1550e-01, PNorm = 76.9520, GNorm = 0.7952, lr_0 = 4.0137e-04
Loss = 1.1154e-01, PNorm = 76.9671, GNorm = 0.6179, lr_0 = 4.0109e-04
Loss = 1.1197e-01, PNorm = 76.9781, GNorm = 0.5195, lr_0 = 4.0082e-04
Loss = 1.2133e-01, PNorm = 76.9933, GNorm = 0.6321, lr_0 = 4.0054e-04
Loss = 1.1725e-01, PNorm = 77.0082, GNorm = 0.7610, lr_0 = 4.0027e-04
Loss = 1.2392e-01, PNorm = 77.0218, GNorm = 0.5102, lr_0 = 3.9999e-04
Loss = 1.1401e-01, PNorm = 77.0328, GNorm = 0.6674, lr_0 = 3.9972e-04
Loss = 1.3324e-01, PNorm = 77.0387, GNorm = 0.6002, lr_0 = 3.9945e-04
Loss = 1.2538e-01, PNorm = 77.0478, GNorm = 0.7320, lr_0 = 3.9917e-04
Loss = 1.1100e-01, PNorm = 77.0605, GNorm = 0.6898, lr_0 = 3.9890e-04
Loss = 1.1862e-01, PNorm = 77.0672, GNorm = 0.7650, lr_0 = 3.9863e-04
Loss = 1.0373e-01, PNorm = 77.0815, GNorm = 0.6908, lr_0 = 3.9835e-04
Loss = 1.2590e-01, PNorm = 77.0932, GNorm = 0.6095, lr_0 = 3.9808e-04
Loss = 1.1040e-01, PNorm = 77.1033, GNorm = 0.5138, lr_0 = 3.9781e-04
Loss = 1.1207e-01, PNorm = 77.1097, GNorm = 0.6135, lr_0 = 3.9753e-04
Loss = 1.0482e-01, PNorm = 77.1194, GNorm = 0.3695, lr_0 = 3.9726e-04
Loss = 1.2742e-01, PNorm = 77.1297, GNorm = 0.6287, lr_0 = 3.9699e-04
Loss = 1.2334e-01, PNorm = 77.1440, GNorm = 0.6768, lr_0 = 3.9672e-04
Loss = 1.1217e-01, PNorm = 77.1553, GNorm = 0.7441, lr_0 = 3.9645e-04
Loss = 1.1932e-01, PNorm = 77.1649, GNorm = 0.6195, lr_0 = 3.9617e-04
Loss = 1.0412e-01, PNorm = 77.1752, GNorm = 0.6735, lr_0 = 3.9590e-04
Loss = 1.2106e-01, PNorm = 77.1823, GNorm = 0.8944, lr_0 = 3.9563e-04
Loss = 1.0945e-01, PNorm = 77.1923, GNorm = 0.7647, lr_0 = 3.9536e-04
Loss = 1.2849e-01, PNorm = 77.1998, GNorm = 1.1229, lr_0 = 3.9509e-04
Loss = 1.2258e-01, PNorm = 77.2116, GNorm = 0.7293, lr_0 = 3.9482e-04
Loss = 1.0970e-01, PNorm = 77.2188, GNorm = 0.7339, lr_0 = 3.9455e-04
Loss = 1.2397e-01, PNorm = 77.2265, GNorm = 0.7116, lr_0 = 3.9428e-04
Loss = 1.0960e-01, PNorm = 77.2332, GNorm = 0.6654, lr_0 = 3.9401e-04
Loss = 1.1620e-01, PNorm = 77.2422, GNorm = 0.7644, lr_0 = 3.9374e-04
Loss = 1.1512e-01, PNorm = 77.2560, GNorm = 0.8099, lr_0 = 3.9347e-04
Loss = 1.3769e-01, PNorm = 77.2609, GNorm = 0.7262, lr_0 = 3.9320e-04
Loss = 1.2778e-01, PNorm = 77.2750, GNorm = 0.5898, lr_0 = 3.9293e-04
Loss = 1.1420e-01, PNorm = 77.2892, GNorm = 0.7429, lr_0 = 3.9266e-04
Loss = 1.2117e-01, PNorm = 77.2931, GNorm = 0.6896, lr_0 = 3.9239e-04
Loss = 1.0650e-01, PNorm = 77.3046, GNorm = 0.5818, lr_0 = 3.9212e-04
Loss = 1.1043e-01, PNorm = 77.3167, GNorm = 0.7588, lr_0 = 3.9185e-04
Loss = 1.1629e-01, PNorm = 77.3231, GNorm = 0.8469, lr_0 = 3.9159e-04
Loss = 1.1905e-01, PNorm = 77.3379, GNorm = 0.7041, lr_0 = 3.9132e-04
Loss = 9.4735e-02, PNorm = 77.3480, GNorm = 0.5202, lr_0 = 3.9105e-04
Loss = 1.2416e-01, PNorm = 77.3579, GNorm = 1.1431, lr_0 = 3.9078e-04
Loss = 1.1015e-01, PNorm = 77.3661, GNorm = 0.6877, lr_0 = 3.9051e-04
Loss = 1.1120e-01, PNorm = 77.3761, GNorm = 0.7538, lr_0 = 3.9025e-04
Loss = 1.2122e-01, PNorm = 77.3856, GNorm = 0.7753, lr_0 = 3.8998e-04
Loss = 1.0792e-01, PNorm = 77.3909, GNorm = 0.6954, lr_0 = 3.8971e-04
Loss = 1.1124e-01, PNorm = 77.3971, GNorm = 0.5590, lr_0 = 3.8945e-04
Loss = 1.0884e-01, PNorm = 77.4125, GNorm = 0.8470, lr_0 = 3.8918e-04
Loss = 1.2426e-01, PNorm = 77.4215, GNorm = 0.8794, lr_0 = 3.8891e-04
Loss = 1.0144e-01, PNorm = 77.4294, GNorm = 0.5775, lr_0 = 3.8865e-04
Loss = 1.1280e-01, PNorm = 77.4371, GNorm = 0.6758, lr_0 = 3.8838e-04
Loss = 1.1242e-01, PNorm = 77.4489, GNorm = 0.8523, lr_0 = 3.8811e-04
Loss = 1.2624e-01, PNorm = 77.4607, GNorm = 0.8777, lr_0 = 3.8785e-04
Loss = 1.2154e-01, PNorm = 77.4747, GNorm = 0.5536, lr_0 = 3.8758e-04
Loss = 1.1590e-01, PNorm = 77.4840, GNorm = 0.4994, lr_0 = 3.8732e-04
Loss = 1.1064e-01, PNorm = 77.4924, GNorm = 0.6545, lr_0 = 3.8705e-04
Loss = 1.1109e-01, PNorm = 77.4994, GNorm = 0.5632, lr_0 = 3.8679e-04
Loss = 1.1339e-01, PNorm = 77.5078, GNorm = 0.6816, lr_0 = 3.8652e-04
Loss = 1.0659e-01, PNorm = 77.5163, GNorm = 0.6229, lr_0 = 3.8626e-04
Loss = 1.1194e-01, PNorm = 77.5287, GNorm = 0.8721, lr_0 = 3.8599e-04
Loss = 1.3070e-01, PNorm = 77.5402, GNorm = 0.5940, lr_0 = 3.8573e-04
Loss = 1.1832e-01, PNorm = 77.5502, GNorm = 0.9688, lr_0 = 3.8546e-04
Loss = 1.2657e-01, PNorm = 77.5557, GNorm = 0.7445, lr_0 = 3.8520e-04
Loss = 1.0880e-01, PNorm = 77.5677, GNorm = 0.6022, lr_0 = 3.8493e-04
Loss = 1.2731e-01, PNorm = 77.5758, GNorm = 0.5129, lr_0 = 3.8467e-04
Loss = 1.1331e-01, PNorm = 77.5841, GNorm = 0.6542, lr_0 = 3.8441e-04
Loss = 1.1045e-01, PNorm = 77.5913, GNorm = 0.7085, lr_0 = 3.8414e-04
Loss = 1.1351e-01, PNorm = 77.5935, GNorm = 0.5065, lr_0 = 3.8388e-04
Loss = 1.2844e-01, PNorm = 77.5999, GNorm = 0.5691, lr_0 = 3.8362e-04
Loss = 1.1713e-01, PNorm = 77.6096, GNorm = 0.7350, lr_0 = 3.8336e-04
Loss = 1.2704e-01, PNorm = 77.6172, GNorm = 1.2360, lr_0 = 3.8309e-04
Loss = 1.1507e-01, PNorm = 77.6257, GNorm = 0.4737, lr_0 = 3.8283e-04
Loss = 1.1637e-01, PNorm = 77.6352, GNorm = 0.8069, lr_0 = 3.8257e-04
Loss = 1.2149e-01, PNorm = 77.6425, GNorm = 0.8653, lr_0 = 3.8231e-04
Loss = 1.3034e-01, PNorm = 77.6529, GNorm = 0.9819, lr_0 = 3.8204e-04
Loss = 1.2331e-01, PNorm = 77.6602, GNorm = 0.6403, lr_0 = 3.8178e-04
Loss = 1.0463e-01, PNorm = 77.6736, GNorm = 0.6413, lr_0 = 3.8152e-04
Loss = 1.1673e-01, PNorm = 77.6868, GNorm = 1.0567, lr_0 = 3.8126e-04
Loss = 1.0336e-01, PNorm = 77.6921, GNorm = 0.6117, lr_0 = 3.8100e-04
Loss = 1.4723e-01, PNorm = 77.7050, GNorm = 0.5411, lr_0 = 3.8074e-04
Loss = 1.0670e-01, PNorm = 77.7140, GNorm = 0.5058, lr_0 = 3.8048e-04
Loss = 1.2355e-01, PNorm = 77.7203, GNorm = 0.5949, lr_0 = 3.8022e-04
Loss = 1.2686e-01, PNorm = 77.7284, GNorm = 0.8825, lr_0 = 3.7995e-04
Loss = 1.3651e-01, PNorm = 77.7380, GNorm = 0.5616, lr_0 = 3.7969e-04
Loss = 1.2563e-01, PNorm = 77.7493, GNorm = 0.7342, lr_0 = 3.7943e-04
Loss = 1.2661e-01, PNorm = 77.7636, GNorm = 0.9958, lr_0 = 3.7917e-04
Loss = 1.2313e-01, PNorm = 77.7721, GNorm = 0.9920, lr_0 = 3.7891e-04
Loss = 1.1445e-01, PNorm = 77.7797, GNorm = 0.7268, lr_0 = 3.7866e-04
Loss = 1.2412e-01, PNorm = 77.7870, GNorm = 1.0444, lr_0 = 3.7840e-04
Loss = 1.2865e-01, PNorm = 77.7980, GNorm = 0.8160, lr_0 = 3.7814e-04
Loss = 1.1237e-01, PNorm = 77.8093, GNorm = 0.6758, lr_0 = 3.7788e-04
Loss = 1.0976e-01, PNorm = 77.8228, GNorm = 0.8650, lr_0 = 3.7762e-04
Loss = 1.1475e-01, PNorm = 77.8327, GNorm = 0.6026, lr_0 = 3.7736e-04
Loss = 1.0992e-01, PNorm = 77.8427, GNorm = 0.5013, lr_0 = 3.7710e-04
Loss = 1.0493e-01, PNorm = 77.8491, GNorm = 0.5173, lr_0 = 3.7684e-04
Loss = 1.2139e-01, PNorm = 77.8552, GNorm = 0.5822, lr_0 = 3.7659e-04
Loss = 1.3147e-01, PNorm = 77.8663, GNorm = 0.7491, lr_0 = 3.7633e-04
Loss = 1.1832e-01, PNorm = 77.8774, GNorm = 0.7912, lr_0 = 3.7607e-04
Loss = 1.1636e-01, PNorm = 77.8848, GNorm = 0.6431, lr_0 = 3.7581e-04
Loss = 1.0903e-01, PNorm = 77.8951, GNorm = 0.5800, lr_0 = 3.7555e-04
Loss = 1.2517e-01, PNorm = 77.9020, GNorm = 0.6821, lr_0 = 3.7530e-04
Loss = 1.1602e-01, PNorm = 77.9048, GNorm = 0.7702, lr_0 = 3.7504e-04
Loss = 1.2000e-01, PNorm = 77.9146, GNorm = 0.8380, lr_0 = 3.7478e-04
Loss = 1.2312e-01, PNorm = 77.9249, GNorm = 0.7733, lr_0 = 3.7453e-04
Loss = 1.2101e-01, PNorm = 77.9370, GNorm = 1.1940, lr_0 = 3.7427e-04
Loss = 1.1736e-01, PNorm = 77.9468, GNorm = 0.6536, lr_0 = 3.7401e-04
Loss = 1.2309e-01, PNorm = 77.9520, GNorm = 0.9074, lr_0 = 3.7376e-04
Loss = 1.1810e-01, PNorm = 77.9670, GNorm = 0.5850, lr_0 = 3.7350e-04
Loss = 1.3246e-01, PNorm = 77.9793, GNorm = 0.5932, lr_0 = 3.7325e-04
Loss = 1.1962e-01, PNorm = 77.9921, GNorm = 0.7694, lr_0 = 3.7299e-04
Loss = 1.1252e-01, PNorm = 77.9973, GNorm = 0.6020, lr_0 = 3.7273e-04
Validation mae = 0.231657
Epoch 14
Loss = 1.0553e-01, PNorm = 78.0060, GNorm = 0.6151, lr_0 = 3.7248e-04
Loss = 1.0720e-01, PNorm = 78.0150, GNorm = 0.7461, lr_0 = 3.7222e-04
Loss = 1.2531e-01, PNorm = 78.0285, GNorm = 0.9627, lr_0 = 3.7197e-04
Loss = 1.0632e-01, PNorm = 78.0390, GNorm = 0.4512, lr_0 = 3.7171e-04
Loss = 1.1155e-01, PNorm = 78.0503, GNorm = 0.5894, lr_0 = 3.7146e-04
Loss = 9.4974e-02, PNorm = 78.0571, GNorm = 0.5393, lr_0 = 3.7120e-04
Loss = 1.2589e-01, PNorm = 78.0665, GNorm = 0.5847, lr_0 = 3.7095e-04
Loss = 1.1363e-01, PNorm = 78.0755, GNorm = 0.6576, lr_0 = 3.7070e-04
Loss = 1.0486e-01, PNorm = 78.0814, GNorm = 0.7381, lr_0 = 3.7044e-04
Loss = 1.0487e-01, PNorm = 78.0932, GNorm = 0.6327, lr_0 = 3.7019e-04
Loss = 1.0905e-01, PNorm = 78.1034, GNorm = 0.8191, lr_0 = 3.6993e-04
Loss = 1.0473e-01, PNorm = 78.1126, GNorm = 0.6793, lr_0 = 3.6968e-04
Loss = 1.0712e-01, PNorm = 78.1223, GNorm = 0.6631, lr_0 = 3.6943e-04
Loss = 1.2782e-01, PNorm = 78.1336, GNorm = 0.8167, lr_0 = 3.6917e-04
Loss = 1.0045e-01, PNorm = 78.1433, GNorm = 0.6464, lr_0 = 3.6892e-04
Loss = 9.6707e-02, PNorm = 78.1454, GNorm = 1.2121, lr_0 = 3.6867e-04
Loss = 1.0835e-01, PNorm = 78.1515, GNorm = 1.2619, lr_0 = 3.6842e-04
Loss = 1.5038e-01, PNorm = 78.1622, GNorm = 0.9183, lr_0 = 3.6816e-04
Loss = 1.1170e-01, PNorm = 78.1725, GNorm = 0.7437, lr_0 = 3.6791e-04
Loss = 1.1352e-01, PNorm = 78.1855, GNorm = 1.0358, lr_0 = 3.6766e-04
Loss = 1.0275e-01, PNorm = 78.1945, GNorm = 0.8837, lr_0 = 3.6741e-04
Loss = 9.5304e-02, PNorm = 78.2026, GNorm = 0.6033, lr_0 = 3.6716e-04
Loss = 1.0918e-01, PNorm = 78.2083, GNorm = 0.5837, lr_0 = 3.6690e-04
Loss = 1.1884e-01, PNorm = 78.2133, GNorm = 0.6789, lr_0 = 3.6665e-04
Loss = 1.0846e-01, PNorm = 78.2162, GNorm = 0.5490, lr_0 = 3.6640e-04
Loss = 9.8020e-02, PNorm = 78.2220, GNorm = 0.5776, lr_0 = 3.6615e-04
Loss = 1.1565e-01, PNorm = 78.2306, GNorm = 0.9285, lr_0 = 3.6590e-04
Loss = 1.1635e-01, PNorm = 78.2396, GNorm = 0.9243, lr_0 = 3.6565e-04
Loss = 1.4043e-01, PNorm = 78.2495, GNorm = 0.6913, lr_0 = 3.6540e-04
Loss = 1.1160e-01, PNorm = 78.2604, GNorm = 0.6706, lr_0 = 3.6515e-04
Loss = 1.1914e-01, PNorm = 78.2753, GNorm = 0.5619, lr_0 = 3.6490e-04
Loss = 1.2411e-01, PNorm = 78.2877, GNorm = 0.7939, lr_0 = 3.6465e-04
Loss = 1.0931e-01, PNorm = 78.2999, GNorm = 0.7572, lr_0 = 3.6440e-04
Loss = 1.1943e-01, PNorm = 78.3067, GNorm = 0.7314, lr_0 = 3.6415e-04
Loss = 1.1509e-01, PNorm = 78.3140, GNorm = 0.9258, lr_0 = 3.6390e-04
Loss = 1.1916e-01, PNorm = 78.3211, GNorm = 0.7039, lr_0 = 3.6365e-04
Loss = 1.0853e-01, PNorm = 78.3310, GNorm = 0.5912, lr_0 = 3.6340e-04
Loss = 1.1019e-01, PNorm = 78.3407, GNorm = 0.8955, lr_0 = 3.6315e-04
Loss = 1.0653e-01, PNorm = 78.3506, GNorm = 0.7442, lr_0 = 3.6290e-04
Loss = 1.1606e-01, PNorm = 78.3632, GNorm = 0.6782, lr_0 = 3.6266e-04
Loss = 1.0949e-01, PNorm = 78.3702, GNorm = 0.6342, lr_0 = 3.6241e-04
Loss = 1.1317e-01, PNorm = 78.3749, GNorm = 1.0250, lr_0 = 3.6216e-04
Loss = 1.1071e-01, PNorm = 78.3813, GNorm = 0.4800, lr_0 = 3.6191e-04
Loss = 1.0446e-01, PNorm = 78.3892, GNorm = 0.6096, lr_0 = 3.6166e-04
Loss = 1.0470e-01, PNorm = 78.3996, GNorm = 0.5278, lr_0 = 3.6141e-04
Loss = 1.0405e-01, PNorm = 78.4077, GNorm = 0.6017, lr_0 = 3.6117e-04
Loss = 1.0990e-01, PNorm = 78.4149, GNorm = 0.9321, lr_0 = 3.6092e-04
Loss = 1.1500e-01, PNorm = 78.4246, GNorm = 0.6784, lr_0 = 3.6067e-04
Loss = 1.0172e-01, PNorm = 78.4364, GNorm = 0.7709, lr_0 = 3.6043e-04
Loss = 1.0357e-01, PNorm = 78.4466, GNorm = 0.5528, lr_0 = 3.6018e-04
Loss = 1.1545e-01, PNorm = 78.4577, GNorm = 0.7989, lr_0 = 3.5993e-04
Loss = 1.1649e-01, PNorm = 78.4630, GNorm = 0.8894, lr_0 = 3.5969e-04
Loss = 1.0583e-01, PNorm = 78.4702, GNorm = 0.5928, lr_0 = 3.5944e-04
Loss = 1.1838e-01, PNorm = 78.4798, GNorm = 0.5496, lr_0 = 3.5919e-04
Loss = 9.7418e-02, PNorm = 78.4883, GNorm = 0.8699, lr_0 = 3.5895e-04
Loss = 1.0765e-01, PNorm = 78.4937, GNorm = 0.5205, lr_0 = 3.5870e-04
Loss = 1.1007e-01, PNorm = 78.5007, GNorm = 0.7396, lr_0 = 3.5845e-04
Loss = 1.0824e-01, PNorm = 78.5081, GNorm = 1.2576, lr_0 = 3.5821e-04
Loss = 1.0784e-01, PNorm = 78.5142, GNorm = 0.6021, lr_0 = 3.5796e-04
Loss = 1.2085e-01, PNorm = 78.5241, GNorm = 1.1092, lr_0 = 3.5772e-04
Loss = 1.0588e-01, PNorm = 78.5357, GNorm = 0.4865, lr_0 = 3.5747e-04
Loss = 1.1458e-01, PNorm = 78.5443, GNorm = 0.7207, lr_0 = 3.5723e-04
Loss = 1.1658e-01, PNorm = 78.5523, GNorm = 0.6437, lr_0 = 3.5698e-04
Loss = 1.0215e-01, PNorm = 78.5651, GNorm = 0.9216, lr_0 = 3.5674e-04
Loss = 1.1161e-01, PNorm = 78.5752, GNorm = 0.6037, lr_0 = 3.5650e-04
Loss = 9.3974e-02, PNorm = 78.5861, GNorm = 0.6771, lr_0 = 3.5625e-04
Loss = 1.1083e-01, PNorm = 78.5874, GNorm = 0.7903, lr_0 = 3.5601e-04
Loss = 1.2434e-01, PNorm = 78.5978, GNorm = 0.7266, lr_0 = 3.5576e-04
Loss = 1.2060e-01, PNorm = 78.6143, GNorm = 0.9693, lr_0 = 3.5552e-04
Loss = 1.2061e-01, PNorm = 78.6250, GNorm = 0.6507, lr_0 = 3.5528e-04
Loss = 1.2803e-01, PNorm = 78.6353, GNorm = 0.5218, lr_0 = 3.5503e-04
Loss = 1.2292e-01, PNorm = 78.6445, GNorm = 0.6575, lr_0 = 3.5479e-04
Loss = 1.1854e-01, PNorm = 78.6550, GNorm = 0.6608, lr_0 = 3.5455e-04
Loss = 1.1590e-01, PNorm = 78.6648, GNorm = 0.6039, lr_0 = 3.5430e-04
Loss = 1.1199e-01, PNorm = 78.6720, GNorm = 0.4737, lr_0 = 3.5406e-04
Loss = 1.1165e-01, PNorm = 78.6805, GNorm = 0.9268, lr_0 = 3.5382e-04
Loss = 1.2395e-01, PNorm = 78.6856, GNorm = 1.3814, lr_0 = 3.5358e-04
Loss = 1.0981e-01, PNorm = 78.6960, GNorm = 0.6220, lr_0 = 3.5333e-04
Loss = 9.3563e-02, PNorm = 78.7065, GNorm = 0.5521, lr_0 = 3.5309e-04
Loss = 1.0822e-01, PNorm = 78.7139, GNorm = 0.5756, lr_0 = 3.5285e-04
Loss = 1.1671e-01, PNorm = 78.7271, GNorm = 0.6301, lr_0 = 3.5261e-04
Loss = 1.1988e-01, PNorm = 78.7347, GNorm = 0.7881, lr_0 = 3.5237e-04
Loss = 1.1163e-01, PNorm = 78.7433, GNorm = 0.6553, lr_0 = 3.5212e-04
Loss = 1.0646e-01, PNorm = 78.7508, GNorm = 0.6240, lr_0 = 3.5188e-04
Loss = 1.1600e-01, PNorm = 78.7538, GNorm = 1.1674, lr_0 = 3.5164e-04
Loss = 1.0636e-01, PNorm = 78.7586, GNorm = 0.7040, lr_0 = 3.5140e-04
Loss = 1.2043e-01, PNorm = 78.7697, GNorm = 0.8254, lr_0 = 3.5116e-04
Loss = 1.2551e-01, PNorm = 78.7828, GNorm = 1.0216, lr_0 = 3.5092e-04
Loss = 1.1059e-01, PNorm = 78.7934, GNorm = 0.6227, lr_0 = 3.5068e-04
Loss = 1.2269e-01, PNorm = 78.8037, GNorm = 0.6869, lr_0 = 3.5044e-04
Loss = 1.2476e-01, PNorm = 78.8094, GNorm = 0.8087, lr_0 = 3.5020e-04
Loss = 1.2216e-01, PNorm = 78.8119, GNorm = 1.3870, lr_0 = 3.4996e-04
Loss = 1.2158e-01, PNorm = 78.8161, GNorm = 0.9643, lr_0 = 3.4972e-04
Loss = 1.0821e-01, PNorm = 78.8257, GNorm = 0.5575, lr_0 = 3.4948e-04
Loss = 1.1840e-01, PNorm = 78.8356, GNorm = 0.5858, lr_0 = 3.4924e-04
Loss = 1.1409e-01, PNorm = 78.8422, GNorm = 0.8300, lr_0 = 3.4900e-04
Loss = 1.1085e-01, PNorm = 78.8454, GNorm = 0.7717, lr_0 = 3.4876e-04
Loss = 1.3139e-01, PNorm = 78.8556, GNorm = 0.6104, lr_0 = 3.4852e-04
Loss = 1.1955e-01, PNorm = 78.8698, GNorm = 0.9851, lr_0 = 3.4828e-04
Loss = 1.0004e-01, PNorm = 78.8781, GNorm = 0.5938, lr_0 = 3.4805e-04
Loss = 1.1022e-01, PNorm = 78.8855, GNorm = 0.7744, lr_0 = 3.4781e-04
Loss = 1.1410e-01, PNorm = 78.8917, GNorm = 0.5630, lr_0 = 3.4757e-04
Loss = 1.1053e-01, PNorm = 78.9023, GNorm = 0.5455, lr_0 = 3.4733e-04
Loss = 1.0908e-01, PNorm = 78.9132, GNorm = 0.7375, lr_0 = 3.4709e-04
Loss = 1.1271e-01, PNorm = 78.9166, GNorm = 0.9252, lr_0 = 3.4686e-04
Loss = 1.0808e-01, PNorm = 78.9252, GNorm = 0.6297, lr_0 = 3.4662e-04
Loss = 1.2379e-01, PNorm = 78.9360, GNorm = 0.8993, lr_0 = 3.4638e-04
Loss = 1.0120e-01, PNorm = 78.9453, GNorm = 0.6676, lr_0 = 3.4614e-04
Loss = 1.0678e-01, PNorm = 78.9539, GNorm = 0.5643, lr_0 = 3.4591e-04
Loss = 1.2153e-01, PNorm = 78.9604, GNorm = 0.6879, lr_0 = 3.4567e-04
Loss = 8.9792e-02, PNorm = 78.9648, GNorm = 0.6832, lr_0 = 3.4543e-04
Loss = 1.1693e-01, PNorm = 78.9704, GNorm = 0.6321, lr_0 = 3.4520e-04
Loss = 1.2307e-01, PNorm = 78.9789, GNorm = 0.8346, lr_0 = 3.4496e-04
Loss = 1.1230e-01, PNorm = 78.9857, GNorm = 0.7266, lr_0 = 3.4472e-04
Loss = 1.1339e-01, PNorm = 78.9893, GNorm = 0.7768, lr_0 = 3.4449e-04
Loss = 1.0331e-01, PNorm = 78.9959, GNorm = 0.7129, lr_0 = 3.4425e-04
Loss = 1.0775e-01, PNorm = 79.0107, GNorm = 0.5468, lr_0 = 3.4402e-04
Loss = 1.0102e-01, PNorm = 79.0190, GNorm = 0.7166, lr_0 = 3.4378e-04
Loss = 1.1058e-01, PNorm = 79.0236, GNorm = 0.4765, lr_0 = 3.4354e-04
Loss = 1.1045e-01, PNorm = 79.0287, GNorm = 0.5603, lr_0 = 3.4331e-04
Validation mae = 0.230481
Epoch 15
Loss = 1.0206e-01, PNorm = 79.0385, GNorm = 0.5327, lr_0 = 3.4307e-04
Loss = 1.1425e-01, PNorm = 79.0475, GNorm = 0.4960, lr_0 = 3.4284e-04
Loss = 1.0092e-01, PNorm = 79.0540, GNorm = 0.5301, lr_0 = 3.4260e-04
Loss = 1.0094e-01, PNorm = 79.0607, GNorm = 0.6777, lr_0 = 3.4237e-04
Loss = 1.0430e-01, PNorm = 79.0662, GNorm = 0.7631, lr_0 = 3.4213e-04
Loss = 1.0187e-01, PNorm = 79.0724, GNorm = 0.5958, lr_0 = 3.4190e-04
Loss = 9.9972e-02, PNorm = 79.0813, GNorm = 0.6366, lr_0 = 3.4167e-04
Loss = 1.0072e-01, PNorm = 79.0894, GNorm = 0.6721, lr_0 = 3.4143e-04
Loss = 1.0885e-01, PNorm = 79.0971, GNorm = 0.5418, lr_0 = 3.4120e-04
Loss = 1.1948e-01, PNorm = 79.1062, GNorm = 0.7216, lr_0 = 3.4096e-04
Loss = 1.2270e-01, PNorm = 79.1181, GNorm = 0.8705, lr_0 = 3.4073e-04
Loss = 1.3423e-01, PNorm = 79.1320, GNorm = 1.0119, lr_0 = 3.4050e-04
Loss = 1.1461e-01, PNorm = 79.1367, GNorm = 0.7671, lr_0 = 3.4026e-04
Loss = 1.0442e-01, PNorm = 79.1483, GNorm = 0.7187, lr_0 = 3.4003e-04
Loss = 1.0524e-01, PNorm = 79.1591, GNorm = 0.5548, lr_0 = 3.3980e-04
Loss = 1.1718e-01, PNorm = 79.1690, GNorm = 0.7501, lr_0 = 3.3956e-04
Loss = 9.8385e-02, PNorm = 79.1784, GNorm = 0.7178, lr_0 = 3.3933e-04
Loss = 9.8561e-02, PNorm = 79.1824, GNorm = 0.6334, lr_0 = 3.3910e-04
Loss = 1.0379e-01, PNorm = 79.1884, GNorm = 0.5698, lr_0 = 3.3887e-04
Loss = 9.8372e-02, PNorm = 79.1928, GNorm = 0.5759, lr_0 = 3.3864e-04
Loss = 1.0761e-01, PNorm = 79.2060, GNorm = 0.6443, lr_0 = 3.3840e-04
Loss = 1.0832e-01, PNorm = 79.2169, GNorm = 1.0308, lr_0 = 3.3817e-04
Loss = 1.0122e-01, PNorm = 79.2224, GNorm = 0.6943, lr_0 = 3.3794e-04
Loss = 1.1025e-01, PNorm = 79.2314, GNorm = 0.7675, lr_0 = 3.3771e-04
Loss = 1.3054e-01, PNorm = 79.2408, GNorm = 0.5841, lr_0 = 3.3748e-04
Loss = 9.7613e-02, PNorm = 79.2478, GNorm = 0.5827, lr_0 = 3.3725e-04
Loss = 1.1749e-01, PNorm = 79.2548, GNorm = 0.7033, lr_0 = 3.3701e-04
Loss = 1.1010e-01, PNorm = 79.2605, GNorm = 0.5064, lr_0 = 3.3678e-04
Loss = 1.0507e-01, PNorm = 79.2681, GNorm = 0.7711, lr_0 = 3.3655e-04
Loss = 9.7031e-02, PNorm = 79.2708, GNorm = 0.7355, lr_0 = 3.3632e-04
Loss = 1.0190e-01, PNorm = 79.2768, GNorm = 0.6807, lr_0 = 3.3609e-04
Loss = 1.0255e-01, PNorm = 79.2882, GNorm = 1.0174, lr_0 = 3.3586e-04
Loss = 9.2317e-02, PNorm = 79.2984, GNorm = 0.5759, lr_0 = 3.3563e-04
Loss = 1.0404e-01, PNorm = 79.3044, GNorm = 0.5795, lr_0 = 3.3540e-04
Loss = 1.2519e-01, PNorm = 79.3115, GNorm = 1.4495, lr_0 = 3.3517e-04
Loss = 1.0829e-01, PNorm = 79.3153, GNorm = 0.5034, lr_0 = 3.3494e-04
Loss = 9.8269e-02, PNorm = 79.3210, GNorm = 0.7122, lr_0 = 3.3471e-04
Loss = 1.1595e-01, PNorm = 79.3282, GNorm = 0.6298, lr_0 = 3.3448e-04
Loss = 9.8824e-02, PNorm = 79.3378, GNorm = 0.6881, lr_0 = 3.3425e-04
Loss = 1.0240e-01, PNorm = 79.3458, GNorm = 0.6817, lr_0 = 3.3403e-04
Loss = 1.2331e-01, PNorm = 79.3554, GNorm = 0.5582, lr_0 = 3.3380e-04
Loss = 1.0266e-01, PNorm = 79.3668, GNorm = 0.6025, lr_0 = 3.3357e-04
Loss = 1.0223e-01, PNorm = 79.3745, GNorm = 0.5773, lr_0 = 3.3334e-04
Loss = 1.1667e-01, PNorm = 79.3814, GNorm = 0.6262, lr_0 = 3.3311e-04
Loss = 1.1062e-01, PNorm = 79.3949, GNorm = 0.7060, lr_0 = 3.3288e-04
Loss = 1.1398e-01, PNorm = 79.4070, GNorm = 0.5980, lr_0 = 3.3265e-04
Loss = 1.1638e-01, PNorm = 79.4175, GNorm = 0.5590, lr_0 = 3.3243e-04
Loss = 1.1229e-01, PNorm = 79.4260, GNorm = 0.6509, lr_0 = 3.3220e-04
Loss = 1.1739e-01, PNorm = 79.4347, GNorm = 1.0304, lr_0 = 3.3197e-04
Loss = 1.0202e-01, PNorm = 79.4414, GNorm = 0.7012, lr_0 = 3.3174e-04
Loss = 1.1068e-01, PNorm = 79.4482, GNorm = 0.9976, lr_0 = 3.3152e-04
Loss = 1.0725e-01, PNorm = 79.4490, GNorm = 0.7918, lr_0 = 3.3129e-04
Loss = 1.1157e-01, PNorm = 79.4580, GNorm = 0.7103, lr_0 = 3.3106e-04
Loss = 1.1910e-01, PNorm = 79.4671, GNorm = 0.7601, lr_0 = 3.3084e-04
Loss = 1.0663e-01, PNorm = 79.4763, GNorm = 0.6794, lr_0 = 3.3061e-04
Loss = 9.8763e-02, PNorm = 79.4817, GNorm = 0.6546, lr_0 = 3.3038e-04
Loss = 1.0901e-01, PNorm = 79.4879, GNorm = 0.7123, lr_0 = 3.3016e-04
Loss = 1.1277e-01, PNorm = 79.4963, GNorm = 1.3922, lr_0 = 3.2993e-04
Loss = 1.0289e-01, PNorm = 79.5025, GNorm = 0.7527, lr_0 = 3.2970e-04
Loss = 1.0824e-01, PNorm = 79.5115, GNorm = 0.9813, lr_0 = 3.2948e-04
Loss = 9.1337e-02, PNorm = 79.5206, GNorm = 0.5760, lr_0 = 3.2925e-04
Loss = 1.0483e-01, PNorm = 79.5259, GNorm = 0.5815, lr_0 = 3.2903e-04
Loss = 1.0560e-01, PNorm = 79.5317, GNorm = 0.6947, lr_0 = 3.2880e-04
Loss = 1.0488e-01, PNorm = 79.5383, GNorm = 0.6211, lr_0 = 3.2858e-04
Loss = 1.1983e-01, PNorm = 79.5453, GNorm = 0.8047, lr_0 = 3.2835e-04
Loss = 1.2327e-01, PNorm = 79.5525, GNorm = 0.5483, lr_0 = 3.2813e-04
Loss = 1.1514e-01, PNorm = 79.5577, GNorm = 0.9344, lr_0 = 3.2790e-04
Loss = 1.1946e-01, PNorm = 79.5675, GNorm = 0.8737, lr_0 = 3.2768e-04
Loss = 1.1821e-01, PNorm = 79.5819, GNorm = 0.6950, lr_0 = 3.2745e-04
Loss = 1.1529e-01, PNorm = 79.5916, GNorm = 0.7951, lr_0 = 3.2723e-04
Loss = 1.0502e-01, PNorm = 79.5972, GNorm = 0.5364, lr_0 = 3.2700e-04
Loss = 1.0151e-01, PNorm = 79.6037, GNorm = 0.6855, lr_0 = 3.2678e-04
Loss = 1.0112e-01, PNorm = 79.6115, GNorm = 0.8624, lr_0 = 3.2656e-04
Loss = 1.0473e-01, PNorm = 79.6175, GNorm = 1.0770, lr_0 = 3.2633e-04
Loss = 1.1279e-01, PNorm = 79.6219, GNorm = 0.6948, lr_0 = 3.2611e-04
Loss = 1.1513e-01, PNorm = 79.6287, GNorm = 1.1164, lr_0 = 3.2589e-04
Loss = 1.0751e-01, PNorm = 79.6374, GNorm = 0.7077, lr_0 = 3.2566e-04
Loss = 1.0328e-01, PNorm = 79.6441, GNorm = 0.7327, lr_0 = 3.2544e-04
Loss = 1.0526e-01, PNorm = 79.6525, GNorm = 0.4333, lr_0 = 3.2522e-04
Loss = 1.0836e-01, PNorm = 79.6588, GNorm = 1.0621, lr_0 = 3.2499e-04
Loss = 1.1720e-01, PNorm = 79.6678, GNorm = 0.8016, lr_0 = 3.2477e-04
Loss = 1.0350e-01, PNorm = 79.6774, GNorm = 0.7889, lr_0 = 3.2455e-04
Loss = 1.0694e-01, PNorm = 79.6842, GNorm = 0.7332, lr_0 = 3.2433e-04
Loss = 1.2362e-01, PNorm = 79.6941, GNorm = 0.5268, lr_0 = 3.2410e-04
Loss = 1.0762e-01, PNorm = 79.7038, GNorm = 1.3451, lr_0 = 3.2388e-04
Loss = 9.5178e-02, PNorm = 79.7106, GNorm = 0.5318, lr_0 = 3.2366e-04
Loss = 1.0193e-01, PNorm = 79.7180, GNorm = 0.5980, lr_0 = 3.2344e-04
Loss = 1.1129e-01, PNorm = 79.7230, GNorm = 0.6895, lr_0 = 3.2322e-04
Loss = 9.8116e-02, PNorm = 79.7297, GNorm = 0.7461, lr_0 = 3.2300e-04
Loss = 1.0520e-01, PNorm = 79.7379, GNorm = 0.5715, lr_0 = 3.2277e-04
Loss = 1.1822e-01, PNorm = 79.7441, GNorm = 0.6640, lr_0 = 3.2255e-04
Loss = 1.0990e-01, PNorm = 79.7520, GNorm = 0.5383, lr_0 = 3.2233e-04
Loss = 1.0533e-01, PNorm = 79.7593, GNorm = 0.6116, lr_0 = 3.2211e-04
Loss = 1.0649e-01, PNorm = 79.7671, GNorm = 0.6202, lr_0 = 3.2189e-04
Loss = 1.1243e-01, PNorm = 79.7677, GNorm = 1.1407, lr_0 = 3.2167e-04
Loss = 1.1014e-01, PNorm = 79.7733, GNorm = 0.8679, lr_0 = 3.2145e-04
Loss = 1.1006e-01, PNorm = 79.7812, GNorm = 1.3319, lr_0 = 3.2123e-04
Loss = 1.1397e-01, PNorm = 79.7882, GNorm = 1.0321, lr_0 = 3.2101e-04
Loss = 1.1438e-01, PNorm = 79.7984, GNorm = 0.8068, lr_0 = 3.2079e-04
Loss = 1.2485e-01, PNorm = 79.8029, GNorm = 0.6675, lr_0 = 3.2057e-04
Loss = 1.1514e-01, PNorm = 79.8095, GNorm = 0.9240, lr_0 = 3.2035e-04
Loss = 1.1650e-01, PNorm = 79.8172, GNorm = 0.8878, lr_0 = 3.2013e-04
Loss = 1.0016e-01, PNorm = 79.8242, GNorm = 0.6057, lr_0 = 3.1991e-04
Loss = 1.0431e-01, PNorm = 79.8264, GNorm = 0.7200, lr_0 = 3.1969e-04
Loss = 9.6367e-02, PNorm = 79.8310, GNorm = 0.4652, lr_0 = 3.1947e-04
Loss = 1.0173e-01, PNorm = 79.8391, GNorm = 0.6570, lr_0 = 3.1925e-04
Loss = 1.1488e-01, PNorm = 79.8479, GNorm = 0.6268, lr_0 = 3.1904e-04
Loss = 1.0571e-01, PNorm = 79.8554, GNorm = 0.7124, lr_0 = 3.1882e-04
Loss = 1.0962e-01, PNorm = 79.8648, GNorm = 0.6784, lr_0 = 3.1860e-04
Loss = 1.0734e-01, PNorm = 79.8729, GNorm = 0.6738, lr_0 = 3.1838e-04
Loss = 1.0125e-01, PNorm = 79.8764, GNorm = 0.5138, lr_0 = 3.1816e-04
Loss = 1.1109e-01, PNorm = 79.8830, GNorm = 0.5949, lr_0 = 3.1794e-04
Loss = 1.1689e-01, PNorm = 79.8902, GNorm = 0.7296, lr_0 = 3.1773e-04
Loss = 1.0237e-01, PNorm = 79.9009, GNorm = 0.5673, lr_0 = 3.1751e-04
Loss = 1.0810e-01, PNorm = 79.9122, GNorm = 0.5854, lr_0 = 3.1729e-04
Loss = 1.0593e-01, PNorm = 79.9228, GNorm = 0.5168, lr_0 = 3.1707e-04
Loss = 1.2006e-01, PNorm = 79.9314, GNorm = 0.6006, lr_0 = 3.1686e-04
Loss = 1.0587e-01, PNorm = 79.9372, GNorm = 0.6227, lr_0 = 3.1664e-04
Loss = 1.0268e-01, PNorm = 79.9411, GNorm = 0.7455, lr_0 = 3.1642e-04
Loss = 1.0386e-01, PNorm = 79.9436, GNorm = 0.4955, lr_0 = 3.1621e-04
Validation mae = 0.230003
Epoch 16
Loss = 9.4110e-02, PNorm = 79.9485, GNorm = 1.0165, lr_0 = 3.1599e-04
Loss = 1.2118e-01, PNorm = 79.9528, GNorm = 0.7859, lr_0 = 3.1577e-04
Loss = 1.1039e-01, PNorm = 79.9577, GNorm = 0.6067, lr_0 = 3.1556e-04
Loss = 9.7872e-02, PNorm = 79.9653, GNorm = 0.5525, lr_0 = 3.1534e-04
Loss = 1.0597e-01, PNorm = 79.9714, GNorm = 0.5442, lr_0 = 3.1512e-04
Loss = 1.0918e-01, PNorm = 79.9790, GNorm = 0.9543, lr_0 = 3.1491e-04
Loss = 1.0197e-01, PNorm = 79.9872, GNorm = 0.6995, lr_0 = 3.1469e-04
Loss = 1.1722e-01, PNorm = 79.9942, GNorm = 0.6581, lr_0 = 3.1448e-04
Loss = 1.0245e-01, PNorm = 80.0029, GNorm = 0.5872, lr_0 = 3.1426e-04
Loss = 9.7457e-02, PNorm = 80.0120, GNorm = 0.7869, lr_0 = 3.1405e-04
Loss = 9.9709e-02, PNorm = 80.0205, GNorm = 1.0671, lr_0 = 3.1383e-04
Loss = 9.1325e-02, PNorm = 80.0288, GNorm = 0.5956, lr_0 = 3.1362e-04
Loss = 9.2327e-02, PNorm = 80.0342, GNorm = 0.5423, lr_0 = 3.1340e-04
Loss = 9.7497e-02, PNorm = 80.0421, GNorm = 0.8507, lr_0 = 3.1319e-04
Loss = 9.9277e-02, PNorm = 80.0443, GNorm = 0.9935, lr_0 = 3.1297e-04
Loss = 1.0434e-01, PNorm = 80.0503, GNorm = 0.6179, lr_0 = 3.1276e-04
Loss = 9.1550e-02, PNorm = 80.0541, GNorm = 0.6270, lr_0 = 3.1254e-04
Loss = 1.1410e-01, PNorm = 80.0608, GNorm = 0.8035, lr_0 = 3.1233e-04
Loss = 1.1250e-01, PNorm = 80.0698, GNorm = 0.8163, lr_0 = 3.1212e-04
Loss = 9.4738e-02, PNorm = 80.0796, GNorm = 0.6431, lr_0 = 3.1190e-04
Loss = 9.0425e-02, PNorm = 80.0854, GNorm = 0.6509, lr_0 = 3.1169e-04
Loss = 1.1569e-01, PNorm = 80.0902, GNorm = 0.5463, lr_0 = 3.1147e-04
Loss = 1.0398e-01, PNorm = 80.0997, GNorm = 0.5756, lr_0 = 3.1126e-04
Loss = 1.1446e-01, PNorm = 80.1078, GNorm = 0.7347, lr_0 = 3.1105e-04
Loss = 1.0315e-01, PNorm = 80.1152, GNorm = 0.5527, lr_0 = 3.1083e-04
Loss = 1.0100e-01, PNorm = 80.1196, GNorm = 0.6529, lr_0 = 3.1062e-04
Loss = 1.2152e-01, PNorm = 80.1193, GNorm = 0.5874, lr_0 = 3.1041e-04
Loss = 9.7309e-02, PNorm = 80.1258, GNorm = 0.5904, lr_0 = 3.1020e-04
Loss = 9.8710e-02, PNorm = 80.1339, GNorm = 0.8547, lr_0 = 3.0998e-04
Loss = 9.5676e-02, PNorm = 80.1414, GNorm = 0.5246, lr_0 = 3.0977e-04
Loss = 9.7010e-02, PNorm = 80.1469, GNorm = 1.0679, lr_0 = 3.0956e-04
Loss = 9.6935e-02, PNorm = 80.1507, GNorm = 0.6565, lr_0 = 3.0935e-04
Loss = 8.9867e-02, PNorm = 80.1546, GNorm = 0.8161, lr_0 = 3.0914e-04
Loss = 1.0245e-01, PNorm = 80.1581, GNorm = 0.8001, lr_0 = 3.0892e-04
Loss = 1.0644e-01, PNorm = 80.1649, GNorm = 0.6715, lr_0 = 3.0871e-04
Loss = 9.8461e-02, PNorm = 80.1731, GNorm = 0.5890, lr_0 = 3.0850e-04
Loss = 1.0368e-01, PNorm = 80.1809, GNorm = 0.5823, lr_0 = 3.0829e-04
Loss = 1.1156e-01, PNorm = 80.1833, GNorm = 0.8437, lr_0 = 3.0808e-04
Loss = 1.0839e-01, PNorm = 80.1890, GNorm = 0.5612, lr_0 = 3.0787e-04
Loss = 9.3434e-02, PNorm = 80.1977, GNorm = 0.6441, lr_0 = 3.0766e-04
Loss = 1.0489e-01, PNorm = 80.2074, GNorm = 0.5888, lr_0 = 3.0745e-04
Loss = 9.9660e-02, PNorm = 80.2138, GNorm = 0.7153, lr_0 = 3.0723e-04
Loss = 1.0757e-01, PNorm = 80.2215, GNorm = 0.8193, lr_0 = 3.0702e-04
Loss = 1.0912e-01, PNorm = 80.2296, GNorm = 1.1537, lr_0 = 3.0681e-04
Loss = 9.8648e-02, PNorm = 80.2338, GNorm = 0.5778, lr_0 = 3.0660e-04
Loss = 1.1029e-01, PNorm = 80.2382, GNorm = 0.6491, lr_0 = 3.0639e-04
Loss = 1.3437e-01, PNorm = 80.2450, GNorm = 0.5937, lr_0 = 3.0618e-04
Loss = 1.0137e-01, PNorm = 80.2562, GNorm = 0.7450, lr_0 = 3.0597e-04
Loss = 1.0486e-01, PNorm = 80.2670, GNorm = 1.1750, lr_0 = 3.0576e-04
Loss = 9.9164e-02, PNorm = 80.2748, GNorm = 0.8422, lr_0 = 3.0555e-04
Loss = 1.1464e-01, PNorm = 80.2845, GNorm = 0.6007, lr_0 = 3.0535e-04
Loss = 1.1428e-01, PNorm = 80.2929, GNorm = 0.5237, lr_0 = 3.0514e-04
Loss = 1.0184e-01, PNorm = 80.2928, GNorm = 0.7473, lr_0 = 3.0493e-04
Loss = 9.9844e-02, PNorm = 80.2984, GNorm = 0.5396, lr_0 = 3.0472e-04
Loss = 1.0139e-01, PNorm = 80.3030, GNorm = 0.7885, lr_0 = 3.0451e-04
Loss = 9.8893e-02, PNorm = 80.3069, GNorm = 0.6353, lr_0 = 3.0430e-04
Loss = 1.0064e-01, PNorm = 80.3150, GNorm = 0.5392, lr_0 = 3.0409e-04
Loss = 1.0390e-01, PNorm = 80.3257, GNorm = 1.0161, lr_0 = 3.0388e-04
Loss = 1.0500e-01, PNorm = 80.3343, GNorm = 1.2311, lr_0 = 3.0368e-04
Loss = 1.1052e-01, PNorm = 80.3455, GNorm = 0.6995, lr_0 = 3.0347e-04
Loss = 9.9623e-02, PNorm = 80.3517, GNorm = 0.5259, lr_0 = 3.0326e-04
Loss = 1.0823e-01, PNorm = 80.3540, GNorm = 0.4806, lr_0 = 3.0305e-04
Loss = 1.0363e-01, PNorm = 80.3596, GNorm = 0.5069, lr_0 = 3.0284e-04
Loss = 1.2314e-01, PNorm = 80.3682, GNorm = 0.6517, lr_0 = 3.0264e-04
Loss = 1.0152e-01, PNorm = 80.3756, GNorm = 0.7148, lr_0 = 3.0243e-04
Loss = 1.0644e-01, PNorm = 80.3814, GNorm = 0.6603, lr_0 = 3.0222e-04
Loss = 9.7970e-02, PNorm = 80.3844, GNorm = 0.5741, lr_0 = 3.0202e-04
Loss = 9.7158e-02, PNorm = 80.3921, GNorm = 0.6295, lr_0 = 3.0181e-04
Loss = 1.0967e-01, PNorm = 80.4017, GNorm = 0.7826, lr_0 = 3.0160e-04
Loss = 9.0796e-02, PNorm = 80.4111, GNorm = 0.6694, lr_0 = 3.0140e-04
Loss = 9.5964e-02, PNorm = 80.4151, GNorm = 0.6313, lr_0 = 3.0119e-04
Loss = 9.2600e-02, PNorm = 80.4218, GNorm = 1.1458, lr_0 = 3.0098e-04
Loss = 1.0184e-01, PNorm = 80.4318, GNorm = 0.5128, lr_0 = 3.0078e-04
Loss = 1.0056e-01, PNorm = 80.4385, GNorm = 0.6737, lr_0 = 3.0057e-04
Loss = 1.0299e-01, PNorm = 80.4404, GNorm = 0.6045, lr_0 = 3.0036e-04
Loss = 1.1358e-01, PNorm = 80.4426, GNorm = 0.6463, lr_0 = 3.0016e-04
Loss = 1.0775e-01, PNorm = 80.4484, GNorm = 0.6499, lr_0 = 2.9995e-04
Loss = 1.0250e-01, PNorm = 80.4530, GNorm = 0.5929, lr_0 = 2.9975e-04
Loss = 8.2365e-02, PNorm = 80.4578, GNorm = 0.4462, lr_0 = 2.9954e-04
Loss = 9.5863e-02, PNorm = 80.4629, GNorm = 0.7036, lr_0 = 2.9934e-04
Loss = 1.0597e-01, PNorm = 80.4668, GNorm = 0.8400, lr_0 = 2.9913e-04
Loss = 1.1159e-01, PNorm = 80.4737, GNorm = 0.5192, lr_0 = 2.9893e-04
Loss = 1.0424e-01, PNorm = 80.4815, GNorm = 0.7125, lr_0 = 2.9872e-04
Loss = 9.7934e-02, PNorm = 80.4869, GNorm = 0.8575, lr_0 = 2.9852e-04
Loss = 1.0169e-01, PNorm = 80.4945, GNorm = 0.6633, lr_0 = 2.9831e-04
Loss = 9.7661e-02, PNorm = 80.4996, GNorm = 0.7932, lr_0 = 2.9811e-04
Loss = 1.1751e-01, PNorm = 80.5081, GNorm = 1.0206, lr_0 = 2.9790e-04
Loss = 1.2234e-01, PNorm = 80.5174, GNorm = 0.8946, lr_0 = 2.9770e-04
Loss = 9.3076e-02, PNorm = 80.5242, GNorm = 0.5493, lr_0 = 2.9750e-04
Loss = 9.9922e-02, PNorm = 80.5317, GNorm = 0.6823, lr_0 = 2.9729e-04
Loss = 9.6164e-02, PNorm = 80.5372, GNorm = 0.6683, lr_0 = 2.9709e-04
Loss = 1.0450e-01, PNorm = 80.5471, GNorm = 0.5401, lr_0 = 2.9689e-04
Loss = 1.0999e-01, PNorm = 80.5557, GNorm = 0.6594, lr_0 = 2.9668e-04
Loss = 8.9935e-02, PNorm = 80.5629, GNorm = 0.5892, lr_0 = 2.9648e-04
Loss = 9.9219e-02, PNorm = 80.5657, GNorm = 0.6707, lr_0 = 2.9628e-04
Loss = 1.0735e-01, PNorm = 80.5703, GNorm = 0.8973, lr_0 = 2.9607e-04
Loss = 9.0701e-02, PNorm = 80.5775, GNorm = 0.6727, lr_0 = 2.9587e-04
Loss = 9.5603e-02, PNorm = 80.5861, GNorm = 0.8906, lr_0 = 2.9567e-04
Loss = 1.1036e-01, PNorm = 80.5913, GNorm = 0.8369, lr_0 = 2.9546e-04
Loss = 1.0309e-01, PNorm = 80.5950, GNorm = 0.4986, lr_0 = 2.9526e-04
Loss = 1.0297e-01, PNorm = 80.6002, GNorm = 0.7878, lr_0 = 2.9506e-04
Loss = 1.0802e-01, PNorm = 80.6048, GNorm = 0.9380, lr_0 = 2.9486e-04
Loss = 9.3098e-02, PNorm = 80.6093, GNorm = 0.5904, lr_0 = 2.9466e-04
Loss = 1.0717e-01, PNorm = 80.6214, GNorm = 0.8695, lr_0 = 2.9445e-04
Loss = 1.0168e-01, PNorm = 80.6292, GNorm = 0.7112, lr_0 = 2.9425e-04
Loss = 1.1192e-01, PNorm = 80.6343, GNorm = 0.8192, lr_0 = 2.9405e-04
Loss = 1.1607e-01, PNorm = 80.6429, GNorm = 0.7828, lr_0 = 2.9385e-04
Loss = 1.2614e-01, PNorm = 80.6496, GNorm = 0.5141, lr_0 = 2.9365e-04
Loss = 1.0324e-01, PNorm = 80.6588, GNorm = 0.6904, lr_0 = 2.9345e-04
Loss = 1.1890e-01, PNorm = 80.6636, GNorm = 0.6182, lr_0 = 2.9325e-04
Loss = 1.0271e-01, PNorm = 80.6669, GNorm = 0.6974, lr_0 = 2.9305e-04
Loss = 1.0541e-01, PNorm = 80.6693, GNorm = 0.7707, lr_0 = 2.9284e-04
Loss = 9.6637e-02, PNorm = 80.6763, GNorm = 0.6236, lr_0 = 2.9264e-04
Loss = 1.1818e-01, PNorm = 80.6814, GNorm = 0.8406, lr_0 = 2.9244e-04
Loss = 1.0597e-01, PNorm = 80.6814, GNorm = 0.9642, lr_0 = 2.9224e-04
Loss = 8.8150e-02, PNorm = 80.6888, GNorm = 0.4770, lr_0 = 2.9204e-04
Loss = 1.2574e-01, PNorm = 80.6970, GNorm = 0.8932, lr_0 = 2.9184e-04
Loss = 1.1609e-01, PNorm = 80.7022, GNorm = 0.6948, lr_0 = 2.9164e-04
Loss = 9.3776e-02, PNorm = 80.7057, GNorm = 0.5487, lr_0 = 2.9144e-04
Loss = 1.0527e-01, PNorm = 80.7124, GNorm = 0.7094, lr_0 = 2.9124e-04
Validation mae = 0.231736
Epoch 17
Loss = 9.5946e-02, PNorm = 80.7168, GNorm = 0.8574, lr_0 = 2.9104e-04
Loss = 1.0507e-01, PNorm = 80.7235, GNorm = 0.8812, lr_0 = 2.9084e-04
Loss = 1.0397e-01, PNorm = 80.7323, GNorm = 0.6922, lr_0 = 2.9065e-04
Loss = 8.5089e-02, PNorm = 80.7430, GNorm = 0.7219, lr_0 = 2.9045e-04
Loss = 1.0079e-01, PNorm = 80.7506, GNorm = 0.6462, lr_0 = 2.9025e-04
Loss = 9.0932e-02, PNorm = 80.7616, GNorm = 0.5351, lr_0 = 2.9005e-04
Loss = 9.8812e-02, PNorm = 80.7702, GNorm = 0.9284, lr_0 = 2.8985e-04
Loss = 9.3194e-02, PNorm = 80.7766, GNorm = 0.6679, lr_0 = 2.8965e-04
Loss = 9.5135e-02, PNorm = 80.7825, GNorm = 0.5121, lr_0 = 2.8945e-04
Loss = 9.1905e-02, PNorm = 80.7893, GNorm = 0.6781, lr_0 = 2.8925e-04
Loss = 9.1492e-02, PNorm = 80.7937, GNorm = 0.8293, lr_0 = 2.8906e-04
Loss = 8.4552e-02, PNorm = 80.8021, GNorm = 0.9404, lr_0 = 2.8886e-04
Loss = 1.0510e-01, PNorm = 80.8109, GNorm = 0.5221, lr_0 = 2.8866e-04
Loss = 9.0546e-02, PNorm = 80.8132, GNorm = 0.9014, lr_0 = 2.8846e-04
Loss = 9.7209e-02, PNorm = 80.8221, GNorm = 1.2259, lr_0 = 2.8826e-04
Loss = 1.2435e-01, PNorm = 80.8317, GNorm = 1.0187, lr_0 = 2.8807e-04
Loss = 1.1511e-01, PNorm = 80.8397, GNorm = 0.6108, lr_0 = 2.8787e-04
Loss = 1.1667e-01, PNorm = 80.8499, GNorm = 0.8170, lr_0 = 2.8767e-04
Loss = 9.2152e-02, PNorm = 80.8567, GNorm = 0.6412, lr_0 = 2.8748e-04
Loss = 8.8199e-02, PNorm = 80.8635, GNorm = 0.5436, lr_0 = 2.8728e-04
Loss = 9.5176e-02, PNorm = 80.8718, GNorm = 0.8491, lr_0 = 2.8708e-04
Loss = 9.4784e-02, PNorm = 80.8771, GNorm = 0.6435, lr_0 = 2.8689e-04
Loss = 9.6788e-02, PNorm = 80.8847, GNorm = 0.5033, lr_0 = 2.8669e-04
Loss = 9.8348e-02, PNorm = 80.8864, GNorm = 0.9293, lr_0 = 2.8649e-04
Loss = 9.9396e-02, PNorm = 80.8879, GNorm = 0.8075, lr_0 = 2.8630e-04
Loss = 9.8025e-02, PNorm = 80.8941, GNorm = 1.3484, lr_0 = 2.8610e-04
Loss = 1.0435e-01, PNorm = 80.8989, GNorm = 0.6352, lr_0 = 2.8590e-04
Loss = 1.0486e-01, PNorm = 80.9074, GNorm = 0.6852, lr_0 = 2.8571e-04
Loss = 9.2151e-02, PNorm = 80.9140, GNorm = 0.5789, lr_0 = 2.8551e-04
Loss = 1.0200e-01, PNorm = 80.9202, GNorm = 0.6587, lr_0 = 2.8532e-04
Loss = 9.0166e-02, PNorm = 80.9251, GNorm = 0.6915, lr_0 = 2.8512e-04
Loss = 1.0034e-01, PNorm = 80.9336, GNorm = 0.6638, lr_0 = 2.8493e-04
Loss = 1.1221e-01, PNorm = 80.9426, GNorm = 0.7438, lr_0 = 2.8473e-04
Loss = 8.9429e-02, PNorm = 80.9500, GNorm = 0.5118, lr_0 = 2.8454e-04
Loss = 1.0218e-01, PNorm = 80.9573, GNorm = 0.6495, lr_0 = 2.8434e-04
Loss = 1.1245e-01, PNorm = 80.9649, GNorm = 0.7053, lr_0 = 2.8415e-04
Loss = 1.0263e-01, PNorm = 80.9719, GNorm = 0.8326, lr_0 = 2.8395e-04
Loss = 8.9945e-02, PNorm = 80.9783, GNorm = 0.5660, lr_0 = 2.8376e-04
Loss = 1.0376e-01, PNorm = 80.9831, GNorm = 0.6255, lr_0 = 2.8356e-04
Loss = 8.7603e-02, PNorm = 80.9898, GNorm = 0.4844, lr_0 = 2.8337e-04
Loss = 8.9799e-02, PNorm = 80.9969, GNorm = 0.4698, lr_0 = 2.8317e-04
Loss = 9.6389e-02, PNorm = 81.0001, GNorm = 0.5812, lr_0 = 2.8298e-04
Loss = 9.9543e-02, PNorm = 81.0075, GNorm = 0.6750, lr_0 = 2.8279e-04
Loss = 1.0522e-01, PNorm = 81.0101, GNorm = 0.7916, lr_0 = 2.8259e-04
Loss = 1.0578e-01, PNorm = 81.0133, GNorm = 0.7351, lr_0 = 2.8240e-04
Loss = 1.0922e-01, PNorm = 81.0196, GNorm = 0.7550, lr_0 = 2.8221e-04
Loss = 9.9477e-02, PNorm = 81.0258, GNorm = 0.6903, lr_0 = 2.8201e-04
Loss = 1.0670e-01, PNorm = 81.0369, GNorm = 0.5571, lr_0 = 2.8182e-04
Loss = 1.1433e-01, PNorm = 81.0464, GNorm = 0.6702, lr_0 = 2.8163e-04
Loss = 1.0232e-01, PNorm = 81.0514, GNorm = 0.6753, lr_0 = 2.8143e-04
Loss = 1.1094e-01, PNorm = 81.0572, GNorm = 0.7610, lr_0 = 2.8124e-04
Loss = 1.1265e-01, PNorm = 81.0629, GNorm = 0.7620, lr_0 = 2.8105e-04
Loss = 1.0446e-01, PNorm = 81.0669, GNorm = 0.7076, lr_0 = 2.8085e-04
Loss = 1.0324e-01, PNorm = 81.0694, GNorm = 0.7000, lr_0 = 2.8066e-04
Loss = 1.1856e-01, PNorm = 81.0737, GNorm = 0.7910, lr_0 = 2.8047e-04
Loss = 8.3750e-02, PNorm = 81.0790, GNorm = 0.8906, lr_0 = 2.8028e-04
Loss = 9.7987e-02, PNorm = 81.0785, GNorm = 0.6074, lr_0 = 2.8009e-04
Loss = 1.0212e-01, PNorm = 81.0834, GNorm = 0.5696, lr_0 = 2.7989e-04
Loss = 1.0016e-01, PNorm = 81.0890, GNorm = 0.5046, lr_0 = 2.7970e-04
Loss = 9.4789e-02, PNorm = 81.0923, GNorm = 0.5640, lr_0 = 2.7951e-04
Loss = 9.9415e-02, PNorm = 81.0989, GNorm = 0.5560, lr_0 = 2.7932e-04
Loss = 8.7974e-02, PNorm = 81.1031, GNorm = 0.9091, lr_0 = 2.7913e-04
Loss = 8.9104e-02, PNorm = 81.1064, GNorm = 0.5550, lr_0 = 2.7894e-04
Loss = 9.3266e-02, PNorm = 81.1121, GNorm = 0.8841, lr_0 = 2.7875e-04
Loss = 1.0253e-01, PNorm = 81.1171, GNorm = 0.6379, lr_0 = 2.7855e-04
Loss = 1.0210e-01, PNorm = 81.1250, GNorm = 0.6763, lr_0 = 2.7836e-04
Loss = 9.4783e-02, PNorm = 81.1322, GNorm = 0.5301, lr_0 = 2.7817e-04
Loss = 9.1759e-02, PNorm = 81.1338, GNorm = 0.5790, lr_0 = 2.7798e-04
Loss = 1.0430e-01, PNorm = 81.1402, GNorm = 0.6008, lr_0 = 2.7779e-04
Loss = 1.0380e-01, PNorm = 81.1503, GNorm = 1.0822, lr_0 = 2.7760e-04
Loss = 1.0226e-01, PNorm = 81.1577, GNorm = 0.8289, lr_0 = 2.7741e-04
Loss = 1.0010e-01, PNorm = 81.1620, GNorm = 0.6831, lr_0 = 2.7722e-04
Loss = 1.0140e-01, PNorm = 81.1691, GNorm = 0.7399, lr_0 = 2.7703e-04
Loss = 1.0426e-01, PNorm = 81.1741, GNorm = 0.6126, lr_0 = 2.7684e-04
Loss = 1.0264e-01, PNorm = 81.1782, GNorm = 0.4744, lr_0 = 2.7665e-04
Loss = 1.0121e-01, PNorm = 81.1825, GNorm = 0.7494, lr_0 = 2.7646e-04
Loss = 1.0466e-01, PNorm = 81.1856, GNorm = 0.9953, lr_0 = 2.7627e-04
Loss = 1.0816e-01, PNorm = 81.1921, GNorm = 0.5459, lr_0 = 2.7608e-04
Loss = 1.0827e-01, PNorm = 81.2023, GNorm = 0.8229, lr_0 = 2.7590e-04
Loss = 1.0278e-01, PNorm = 81.2060, GNorm = 0.6667, lr_0 = 2.7571e-04
Loss = 1.0866e-01, PNorm = 81.2123, GNorm = 0.5532, lr_0 = 2.7552e-04
Loss = 1.0543e-01, PNorm = 81.2220, GNorm = 0.7254, lr_0 = 2.7533e-04
Loss = 1.1168e-01, PNorm = 81.2293, GNorm = 1.0012, lr_0 = 2.7514e-04
Loss = 1.0596e-01, PNorm = 81.2330, GNorm = 0.9666, lr_0 = 2.7495e-04
Loss = 1.1853e-01, PNorm = 81.2403, GNorm = 0.8323, lr_0 = 2.7476e-04
Loss = 1.0257e-01, PNorm = 81.2433, GNorm = 0.5453, lr_0 = 2.7457e-04
Loss = 9.1803e-02, PNorm = 81.2488, GNorm = 0.5373, lr_0 = 2.7439e-04
Loss = 9.7561e-02, PNorm = 81.2572, GNorm = 0.5465, lr_0 = 2.7420e-04
Loss = 1.0765e-01, PNorm = 81.2631, GNorm = 0.5967, lr_0 = 2.7401e-04
Loss = 1.0627e-01, PNorm = 81.2671, GNorm = 0.8167, lr_0 = 2.7382e-04
Loss = 1.0997e-01, PNorm = 81.2733, GNorm = 0.8019, lr_0 = 2.7364e-04
Loss = 8.7216e-02, PNorm = 81.2783, GNorm = 0.4873, lr_0 = 2.7345e-04
Loss = 1.0456e-01, PNorm = 81.2865, GNorm = 0.5686, lr_0 = 2.7326e-04
Loss = 9.8498e-02, PNorm = 81.2917, GNorm = 0.4936, lr_0 = 2.7307e-04
Loss = 1.1379e-01, PNorm = 81.2968, GNorm = 0.8453, lr_0 = 2.7289e-04
Loss = 1.0038e-01, PNorm = 81.3016, GNorm = 0.5815, lr_0 = 2.7270e-04
Loss = 1.0091e-01, PNorm = 81.3068, GNorm = 0.4340, lr_0 = 2.7251e-04
Loss = 9.4399e-02, PNorm = 81.3160, GNorm = 0.5245, lr_0 = 2.7233e-04
Loss = 9.8748e-02, PNorm = 81.3228, GNorm = 0.9769, lr_0 = 2.7214e-04
Loss = 9.7154e-02, PNorm = 81.3279, GNorm = 0.5503, lr_0 = 2.7195e-04
Loss = 1.1468e-01, PNorm = 81.3322, GNorm = 0.6179, lr_0 = 2.7177e-04
Loss = 1.0542e-01, PNorm = 81.3354, GNorm = 0.7667, lr_0 = 2.7158e-04
Loss = 9.6584e-02, PNorm = 81.3402, GNorm = 0.7533, lr_0 = 2.7139e-04
Loss = 1.0866e-01, PNorm = 81.3455, GNorm = 0.4633, lr_0 = 2.7121e-04
Loss = 9.7710e-02, PNorm = 81.3498, GNorm = 0.7254, lr_0 = 2.7102e-04
Loss = 9.6892e-02, PNorm = 81.3543, GNorm = 0.5042, lr_0 = 2.7084e-04
Loss = 9.8567e-02, PNorm = 81.3578, GNorm = 0.5293, lr_0 = 2.7065e-04
Loss = 1.0215e-01, PNorm = 81.3658, GNorm = 0.5752, lr_0 = 2.7047e-04
Loss = 9.3051e-02, PNorm = 81.3708, GNorm = 0.6410, lr_0 = 2.7028e-04
Loss = 1.1093e-01, PNorm = 81.3711, GNorm = 0.8459, lr_0 = 2.7010e-04
Loss = 9.5371e-02, PNorm = 81.3758, GNorm = 0.7516, lr_0 = 2.6991e-04
Loss = 9.0251e-02, PNorm = 81.3802, GNorm = 0.6787, lr_0 = 2.6973e-04
Loss = 8.7645e-02, PNorm = 81.3836, GNorm = 0.5295, lr_0 = 2.6954e-04
Loss = 9.6145e-02, PNorm = 81.3867, GNorm = 0.5053, lr_0 = 2.6936e-04
Loss = 9.2393e-02, PNorm = 81.3919, GNorm = 0.7043, lr_0 = 2.6917e-04
Loss = 1.2261e-01, PNorm = 81.3975, GNorm = 0.6291, lr_0 = 2.6899e-04
Loss = 1.0408e-01, PNorm = 81.4021, GNorm = 0.7290, lr_0 = 2.6880e-04
Loss = 8.7601e-02, PNorm = 81.4071, GNorm = 0.5767, lr_0 = 2.6862e-04
Loss = 9.4923e-02, PNorm = 81.4128, GNorm = 0.7515, lr_0 = 2.6844e-04
Loss = 9.9822e-02, PNorm = 81.4214, GNorm = 0.4811, lr_0 = 2.6825e-04
Validation mae = 0.234406
Epoch 18
Loss = 1.0222e-01, PNorm = 81.4285, GNorm = 0.8052, lr_0 = 2.6807e-04
Loss = 9.5538e-02, PNorm = 81.4397, GNorm = 0.5255, lr_0 = 2.6788e-04
Loss = 9.5274e-02, PNorm = 81.4488, GNorm = 1.0032, lr_0 = 2.6770e-04
Loss = 9.8708e-02, PNorm = 81.4505, GNorm = 0.5555, lr_0 = 2.6752e-04
Loss = 8.6529e-02, PNorm = 81.4564, GNorm = 0.5415, lr_0 = 2.6733e-04
Loss = 7.9422e-02, PNorm = 81.4635, GNorm = 0.5350, lr_0 = 2.6715e-04
Loss = 9.8078e-02, PNorm = 81.4667, GNorm = 0.5718, lr_0 = 2.6697e-04
Loss = 1.0030e-01, PNorm = 81.4735, GNorm = 0.8318, lr_0 = 2.6678e-04
Loss = 9.9227e-02, PNorm = 81.4785, GNorm = 0.8129, lr_0 = 2.6660e-04
Loss = 8.5883e-02, PNorm = 81.4823, GNorm = 0.6957, lr_0 = 2.6642e-04
Loss = 9.1316e-02, PNorm = 81.4875, GNorm = 0.5428, lr_0 = 2.6624e-04
Loss = 9.9897e-02, PNorm = 81.4945, GNorm = 0.7180, lr_0 = 2.6605e-04
Loss = 9.7485e-02, PNorm = 81.5006, GNorm = 0.7233, lr_0 = 2.6587e-04
Loss = 1.0105e-01, PNorm = 81.5092, GNorm = 0.5782, lr_0 = 2.6569e-04
Loss = 9.1918e-02, PNorm = 81.5141, GNorm = 0.6166, lr_0 = 2.6551e-04
Loss = 9.3236e-02, PNorm = 81.5196, GNorm = 0.5205, lr_0 = 2.6533e-04
Loss = 1.0285e-01, PNorm = 81.5231, GNorm = 0.9140, lr_0 = 2.6514e-04
Loss = 9.8451e-02, PNorm = 81.5278, GNorm = 0.5262, lr_0 = 2.6496e-04
Loss = 1.0320e-01, PNorm = 81.5361, GNorm = 0.7156, lr_0 = 2.6478e-04
Loss = 9.3719e-02, PNorm = 81.5406, GNorm = 0.4484, lr_0 = 2.6460e-04
Loss = 1.0340e-01, PNorm = 81.5481, GNorm = 0.7029, lr_0 = 2.6442e-04
Loss = 9.7601e-02, PNorm = 81.5531, GNorm = 0.8508, lr_0 = 2.6424e-04
Loss = 9.7510e-02, PNorm = 81.5588, GNorm = 0.5236, lr_0 = 2.6406e-04
Loss = 9.5173e-02, PNorm = 81.5645, GNorm = 0.5209, lr_0 = 2.6388e-04
Loss = 9.9646e-02, PNorm = 81.5693, GNorm = 0.6527, lr_0 = 2.6369e-04
Loss = 1.1375e-01, PNorm = 81.5773, GNorm = 0.7132, lr_0 = 2.6351e-04
Loss = 8.7858e-02, PNorm = 81.5851, GNorm = 0.5810, lr_0 = 2.6333e-04
Loss = 8.8847e-02, PNorm = 81.5935, GNorm = 0.6676, lr_0 = 2.6315e-04
Loss = 8.3770e-02, PNorm = 81.5982, GNorm = 0.6129, lr_0 = 2.6297e-04
Loss = 1.0394e-01, PNorm = 81.6025, GNorm = 0.5960, lr_0 = 2.6279e-04
Loss = 9.7538e-02, PNorm = 81.6085, GNorm = 0.6585, lr_0 = 2.6261e-04
Loss = 1.1341e-01, PNorm = 81.6169, GNorm = 0.8815, lr_0 = 2.6243e-04
Loss = 9.5804e-02, PNorm = 81.6223, GNorm = 0.5999, lr_0 = 2.6225e-04
Loss = 9.8977e-02, PNorm = 81.6267, GNorm = 0.6482, lr_0 = 2.6207e-04
Loss = 9.8798e-02, PNorm = 81.6346, GNorm = 0.6042, lr_0 = 2.6189e-04
Loss = 9.6585e-02, PNorm = 81.6404, GNorm = 0.9533, lr_0 = 2.6171e-04
Loss = 8.5038e-02, PNorm = 81.6436, GNorm = 0.6474, lr_0 = 2.6153e-04
Loss = 1.0703e-01, PNorm = 81.6478, GNorm = 0.5724, lr_0 = 2.6136e-04
Loss = 8.5998e-02, PNorm = 81.6538, GNorm = 0.5767, lr_0 = 2.6118e-04
Loss = 9.6224e-02, PNorm = 81.6581, GNorm = 0.5494, lr_0 = 2.6100e-04
Loss = 1.1513e-01, PNorm = 81.6646, GNorm = 0.6691, lr_0 = 2.6082e-04
Loss = 9.4795e-02, PNorm = 81.6692, GNorm = 0.6753, lr_0 = 2.6064e-04
Loss = 9.2933e-02, PNorm = 81.6737, GNorm = 0.7056, lr_0 = 2.6046e-04
Loss = 9.8945e-02, PNorm = 81.6780, GNorm = 0.5485, lr_0 = 2.6028e-04
Loss = 1.0167e-01, PNorm = 81.6798, GNorm = 0.5796, lr_0 = 2.6011e-04
Loss = 9.9237e-02, PNorm = 81.6863, GNorm = 0.7338, lr_0 = 2.5993e-04
Loss = 8.8458e-02, PNorm = 81.6898, GNorm = 0.7913, lr_0 = 2.5975e-04
Loss = 1.0731e-01, PNorm = 81.6955, GNorm = 0.9106, lr_0 = 2.5957e-04
Loss = 8.9538e-02, PNorm = 81.7010, GNorm = 0.7203, lr_0 = 2.5939e-04
Loss = 9.5122e-02, PNorm = 81.7071, GNorm = 0.7398, lr_0 = 2.5922e-04
Loss = 9.4330e-02, PNorm = 81.7174, GNorm = 0.7007, lr_0 = 2.5904e-04
Loss = 9.4137e-02, PNorm = 81.7272, GNorm = 0.8680, lr_0 = 2.5886e-04
Loss = 1.0231e-01, PNorm = 81.7296, GNorm = 0.8217, lr_0 = 2.5868e-04
Loss = 9.3548e-02, PNorm = 81.7354, GNorm = 0.6363, lr_0 = 2.5851e-04
Loss = 9.2987e-02, PNorm = 81.7380, GNorm = 0.5082, lr_0 = 2.5833e-04
Loss = 8.4644e-02, PNorm = 81.7411, GNorm = 0.4359, lr_0 = 2.5815e-04
Loss = 1.0065e-01, PNorm = 81.7451, GNorm = 0.9873, lr_0 = 2.5797e-04
Loss = 1.0021e-01, PNorm = 81.7527, GNorm = 0.9231, lr_0 = 2.5780e-04
Loss = 1.1900e-01, PNorm = 81.7572, GNorm = 0.6811, lr_0 = 2.5762e-04
Loss = 9.9396e-02, PNorm = 81.7612, GNorm = 0.4915, lr_0 = 2.5745e-04
Loss = 1.0475e-01, PNorm = 81.7703, GNorm = 0.7046, lr_0 = 2.5727e-04
Loss = 9.9512e-02, PNorm = 81.7758, GNorm = 0.9376, lr_0 = 2.5709e-04
Loss = 9.4284e-02, PNorm = 81.7822, GNorm = 0.6683, lr_0 = 2.5692e-04
Loss = 9.5259e-02, PNorm = 81.7900, GNorm = 0.8870, lr_0 = 2.5674e-04
Loss = 1.0489e-01, PNorm = 81.7968, GNorm = 0.5641, lr_0 = 2.5656e-04
Loss = 1.0011e-01, PNorm = 81.8046, GNorm = 0.6737, lr_0 = 2.5639e-04
Loss = 9.8074e-02, PNorm = 81.8119, GNorm = 0.7433, lr_0 = 2.5621e-04
Loss = 9.7227e-02, PNorm = 81.8174, GNorm = 0.7120, lr_0 = 2.5604e-04
Loss = 9.5519e-02, PNorm = 81.8205, GNorm = 0.6690, lr_0 = 2.5586e-04
Loss = 8.9278e-02, PNorm = 81.8222, GNorm = 0.5921, lr_0 = 2.5569e-04
Loss = 9.3232e-02, PNorm = 81.8269, GNorm = 0.6389, lr_0 = 2.5551e-04
Loss = 9.3922e-02, PNorm = 81.8307, GNorm = 0.6224, lr_0 = 2.5534e-04
Loss = 1.0822e-01, PNorm = 81.8371, GNorm = 0.6476, lr_0 = 2.5516e-04
Loss = 8.1626e-02, PNorm = 81.8450, GNorm = 0.6423, lr_0 = 2.5499e-04
Loss = 9.6479e-02, PNorm = 81.8489, GNorm = 0.5136, lr_0 = 2.5481e-04
Loss = 9.0005e-02, PNorm = 81.8505, GNorm = 0.6197, lr_0 = 2.5464e-04
Loss = 9.7905e-02, PNorm = 81.8547, GNorm = 0.5606, lr_0 = 2.5446e-04
Loss = 9.9640e-02, PNorm = 81.8604, GNorm = 0.6374, lr_0 = 2.5429e-04
Loss = 9.3981e-02, PNorm = 81.8629, GNorm = 0.3845, lr_0 = 2.5411e-04
Loss = 8.8005e-02, PNorm = 81.8686, GNorm = 0.8298, lr_0 = 2.5394e-04
Loss = 1.0405e-01, PNorm = 81.8743, GNorm = 0.9098, lr_0 = 2.5377e-04
Loss = 1.0285e-01, PNorm = 81.8763, GNorm = 0.8438, lr_0 = 2.5359e-04
Loss = 9.9435e-02, PNorm = 81.8790, GNorm = 0.5592, lr_0 = 2.5342e-04
Loss = 9.7994e-02, PNorm = 81.8842, GNorm = 0.7725, lr_0 = 2.5325e-04
Loss = 8.5585e-02, PNorm = 81.8920, GNorm = 0.4045, lr_0 = 2.5307e-04
Loss = 9.1087e-02, PNorm = 81.9019, GNorm = 0.5727, lr_0 = 2.5290e-04
Loss = 9.5333e-02, PNorm = 81.9075, GNorm = 0.7251, lr_0 = 2.5273e-04
Loss = 1.0419e-01, PNorm = 81.9096, GNorm = 0.7821, lr_0 = 2.5255e-04
Loss = 1.0452e-01, PNorm = 81.9101, GNorm = 0.6842, lr_0 = 2.5238e-04
Loss = 1.1178e-01, PNorm = 81.9136, GNorm = 1.0056, lr_0 = 2.5221e-04
Loss = 9.2535e-02, PNorm = 81.9160, GNorm = 0.5126, lr_0 = 2.5203e-04
Loss = 9.9595e-02, PNorm = 81.9200, GNorm = 0.7253, lr_0 = 2.5186e-04
Loss = 1.0395e-01, PNorm = 81.9255, GNorm = 0.7643, lr_0 = 2.5169e-04
Loss = 8.0539e-02, PNorm = 81.9318, GNorm = 0.6095, lr_0 = 2.5152e-04
Loss = 1.0463e-01, PNorm = 81.9349, GNorm = 0.6345, lr_0 = 2.5134e-04
Loss = 1.0295e-01, PNorm = 81.9367, GNorm = 0.6021, lr_0 = 2.5117e-04
Loss = 9.2483e-02, PNorm = 81.9437, GNorm = 1.1202, lr_0 = 2.5100e-04
Loss = 1.0210e-01, PNorm = 81.9510, GNorm = 0.6716, lr_0 = 2.5083e-04
Loss = 9.6327e-02, PNorm = 81.9586, GNorm = 0.5833, lr_0 = 2.5066e-04
Loss = 1.1766e-01, PNorm = 81.9670, GNorm = 0.5861, lr_0 = 2.5048e-04
Loss = 9.9791e-02, PNorm = 81.9745, GNorm = 0.5735, lr_0 = 2.5031e-04
Loss = 8.9051e-02, PNorm = 81.9816, GNorm = 0.6283, lr_0 = 2.5014e-04
Loss = 9.7284e-02, PNorm = 81.9841, GNorm = 0.6851, lr_0 = 2.4997e-04
Loss = 9.5786e-02, PNorm = 81.9853, GNorm = 0.7751, lr_0 = 2.4980e-04
Loss = 9.0170e-02, PNorm = 81.9902, GNorm = 0.4618, lr_0 = 2.4963e-04
Loss = 1.0163e-01, PNorm = 81.9908, GNorm = 0.7276, lr_0 = 2.4946e-04
Loss = 1.0145e-01, PNorm = 81.9948, GNorm = 0.5554, lr_0 = 2.4929e-04
Loss = 1.0107e-01, PNorm = 82.0001, GNorm = 0.6485, lr_0 = 2.4911e-04
Loss = 9.7727e-02, PNorm = 82.0081, GNorm = 0.8383, lr_0 = 2.4894e-04
Loss = 1.0537e-01, PNorm = 82.0112, GNorm = 0.9338, lr_0 = 2.4877e-04
Loss = 9.8869e-02, PNorm = 82.0103, GNorm = 0.8451, lr_0 = 2.4860e-04
Loss = 1.0481e-01, PNorm = 82.0153, GNorm = 0.6294, lr_0 = 2.4843e-04
Loss = 1.0854e-01, PNorm = 82.0179, GNorm = 0.6157, lr_0 = 2.4826e-04
Loss = 9.9570e-02, PNorm = 82.0238, GNorm = 0.4976, lr_0 = 2.4809e-04
Loss = 9.2320e-02, PNorm = 82.0319, GNorm = 0.4683, lr_0 = 2.4792e-04
Loss = 9.5715e-02, PNorm = 82.0367, GNorm = 0.6157, lr_0 = 2.4775e-04
Loss = 9.6005e-02, PNorm = 82.0441, GNorm = 0.7319, lr_0 = 2.4758e-04
Loss = 8.7171e-02, PNorm = 82.0499, GNorm = 0.9826, lr_0 = 2.4741e-04
Loss = 9.8585e-02, PNorm = 82.0540, GNorm = 0.8304, lr_0 = 2.4724e-04
Loss = 1.0753e-01, PNorm = 82.0575, GNorm = 0.5481, lr_0 = 2.4707e-04
Validation mae = 0.227584
Epoch 19
Loss = 9.8271e-02, PNorm = 82.0620, GNorm = 0.7494, lr_0 = 2.4690e-04
Loss = 8.2978e-02, PNorm = 82.0650, GNorm = 0.6572, lr_0 = 2.4674e-04
Loss = 9.2449e-02, PNorm = 82.0703, GNorm = 0.4822, lr_0 = 2.4657e-04
Loss = 9.4670e-02, PNorm = 82.0735, GNorm = 0.5318, lr_0 = 2.4640e-04
Loss = 9.1126e-02, PNorm = 82.0781, GNorm = 0.4048, lr_0 = 2.4623e-04
Loss = 8.4188e-02, PNorm = 82.0821, GNorm = 0.5681, lr_0 = 2.4606e-04
Loss = 8.4433e-02, PNorm = 82.0862, GNorm = 0.7389, lr_0 = 2.4589e-04
Loss = 9.5457e-02, PNorm = 82.0894, GNorm = 0.5987, lr_0 = 2.4572e-04
Loss = 9.1611e-02, PNorm = 82.0975, GNorm = 0.6887, lr_0 = 2.4556e-04
Loss = 9.6057e-02, PNorm = 82.1019, GNorm = 0.5802, lr_0 = 2.4539e-04
Loss = 8.5982e-02, PNorm = 82.1079, GNorm = 0.5656, lr_0 = 2.4522e-04
Loss = 1.0009e-01, PNorm = 82.1105, GNorm = 0.8154, lr_0 = 2.4505e-04
Loss = 9.1249e-02, PNorm = 82.1125, GNorm = 0.7039, lr_0 = 2.4488e-04
Loss = 9.6790e-02, PNorm = 82.1182, GNorm = 0.6029, lr_0 = 2.4472e-04
Loss = 8.7430e-02, PNorm = 82.1220, GNorm = 0.6418, lr_0 = 2.4455e-04
Loss = 8.5669e-02, PNorm = 82.1226, GNorm = 0.5856, lr_0 = 2.4438e-04
Loss = 8.6961e-02, PNorm = 82.1275, GNorm = 0.6801, lr_0 = 2.4421e-04
Loss = 8.6958e-02, PNorm = 82.1330, GNorm = 0.5684, lr_0 = 2.4405e-04
Loss = 1.0289e-01, PNorm = 82.1363, GNorm = 0.6582, lr_0 = 2.4388e-04
Loss = 9.2312e-02, PNorm = 82.1390, GNorm = 0.5828, lr_0 = 2.4371e-04
Loss = 9.6705e-02, PNorm = 82.1450, GNorm = 0.6121, lr_0 = 2.4354e-04
Loss = 8.7421e-02, PNorm = 82.1510, GNorm = 0.8338, lr_0 = 2.4338e-04
Loss = 9.0976e-02, PNorm = 82.1520, GNorm = 0.6937, lr_0 = 2.4321e-04
Loss = 1.0208e-01, PNorm = 82.1548, GNorm = 0.7384, lr_0 = 2.4304e-04
Loss = 9.1326e-02, PNorm = 82.1618, GNorm = 0.4478, lr_0 = 2.4288e-04
Loss = 9.0953e-02, PNorm = 82.1687, GNorm = 0.7093, lr_0 = 2.4271e-04
Loss = 9.5267e-02, PNorm = 82.1733, GNorm = 0.5756, lr_0 = 2.4254e-04
Loss = 8.3509e-02, PNorm = 82.1758, GNorm = 0.6019, lr_0 = 2.4238e-04
Loss = 8.2064e-02, PNorm = 82.1793, GNorm = 0.6645, lr_0 = 2.4221e-04
Loss = 9.0253e-02, PNorm = 82.1853, GNorm = 0.6648, lr_0 = 2.4205e-04
Loss = 9.2438e-02, PNorm = 82.1931, GNorm = 0.6648, lr_0 = 2.4188e-04
Loss = 8.0247e-02, PNorm = 82.1986, GNorm = 0.6862, lr_0 = 2.4171e-04
Loss = 9.7759e-02, PNorm = 82.2038, GNorm = 0.5381, lr_0 = 2.4155e-04
Loss = 8.9583e-02, PNorm = 82.2099, GNorm = 0.4699, lr_0 = 2.4138e-04
Loss = 9.7019e-02, PNorm = 82.2132, GNorm = 0.6318, lr_0 = 2.4122e-04
Loss = 1.0644e-01, PNorm = 82.2200, GNorm = 0.6630, lr_0 = 2.4105e-04
Loss = 9.3700e-02, PNorm = 82.2238, GNorm = 0.5754, lr_0 = 2.4089e-04
Loss = 8.3255e-02, PNorm = 82.2282, GNorm = 0.7059, lr_0 = 2.4072e-04
Loss = 9.6300e-02, PNorm = 82.2314, GNorm = 0.6922, lr_0 = 2.4056e-04
Loss = 1.0016e-01, PNorm = 82.2352, GNorm = 0.8269, lr_0 = 2.4039e-04
Loss = 1.0070e-01, PNorm = 82.2400, GNorm = 0.6405, lr_0 = 2.4023e-04
Loss = 9.7679e-02, PNorm = 82.2427, GNorm = 0.5913, lr_0 = 2.4006e-04
Loss = 9.3704e-02, PNorm = 82.2466, GNorm = 0.6328, lr_0 = 2.3990e-04
Loss = 1.1210e-01, PNorm = 82.2507, GNorm = 0.7158, lr_0 = 2.3974e-04
Loss = 9.8600e-02, PNorm = 82.2553, GNorm = 0.6803, lr_0 = 2.3957e-04
Loss = 8.0451e-02, PNorm = 82.2598, GNorm = 0.7438, lr_0 = 2.3941e-04
Loss = 9.3128e-02, PNorm = 82.2635, GNorm = 0.6561, lr_0 = 2.3924e-04
Loss = 8.8494e-02, PNorm = 82.2641, GNorm = 0.5734, lr_0 = 2.3908e-04
Loss = 9.3940e-02, PNorm = 82.2707, GNorm = 1.1803, lr_0 = 2.3892e-04
Loss = 9.7329e-02, PNorm = 82.2781, GNorm = 0.6263, lr_0 = 2.3875e-04
Loss = 9.8726e-02, PNorm = 82.2843, GNorm = 0.6139, lr_0 = 2.3859e-04
Loss = 9.8299e-02, PNorm = 82.2908, GNorm = 0.6135, lr_0 = 2.3842e-04
Loss = 9.4099e-02, PNorm = 82.2951, GNorm = 0.6596, lr_0 = 2.3826e-04
Loss = 9.3792e-02, PNorm = 82.3006, GNorm = 0.8109, lr_0 = 2.3810e-04
Loss = 9.6445e-02, PNorm = 82.3061, GNorm = 0.6094, lr_0 = 2.3794e-04
Loss = 9.8609e-02, PNorm = 82.3086, GNorm = 0.5381, lr_0 = 2.3777e-04
Loss = 8.4487e-02, PNorm = 82.3152, GNorm = 0.6067, lr_0 = 2.3761e-04
Loss = 1.0785e-01, PNorm = 82.3209, GNorm = 0.5498, lr_0 = 2.3745e-04
Loss = 9.4290e-02, PNorm = 82.3264, GNorm = 0.6604, lr_0 = 2.3728e-04
Loss = 8.4166e-02, PNorm = 82.3312, GNorm = 0.5333, lr_0 = 2.3712e-04
Loss = 8.0242e-02, PNorm = 82.3344, GNorm = 0.6852, lr_0 = 2.3696e-04
Loss = 8.6941e-02, PNorm = 82.3363, GNorm = 0.5766, lr_0 = 2.3680e-04
Loss = 9.7027e-02, PNorm = 82.3405, GNorm = 0.6919, lr_0 = 2.3663e-04
Loss = 9.3452e-02, PNorm = 82.3428, GNorm = 0.4848, lr_0 = 2.3647e-04
Loss = 8.7428e-02, PNorm = 82.3469, GNorm = 0.7333, lr_0 = 2.3631e-04
Loss = 9.7164e-02, PNorm = 82.3482, GNorm = 0.6287, lr_0 = 2.3615e-04
Loss = 9.0509e-02, PNorm = 82.3544, GNorm = 0.5914, lr_0 = 2.3599e-04
Loss = 8.7017e-02, PNorm = 82.3593, GNorm = 0.5349, lr_0 = 2.3582e-04
Loss = 8.5766e-02, PNorm = 82.3626, GNorm = 0.6936, lr_0 = 2.3566e-04
Loss = 8.0058e-02, PNorm = 82.3635, GNorm = 0.8487, lr_0 = 2.3550e-04
Loss = 8.6260e-02, PNorm = 82.3653, GNorm = 0.6313, lr_0 = 2.3534e-04
Loss = 8.0201e-02, PNorm = 82.3675, GNorm = 0.4908, lr_0 = 2.3518e-04
Loss = 8.9830e-02, PNorm = 82.3726, GNorm = 0.4561, lr_0 = 2.3502e-04
Loss = 1.0769e-01, PNorm = 82.3772, GNorm = 0.7206, lr_0 = 2.3486e-04
Loss = 8.3732e-02, PNorm = 82.3827, GNorm = 0.6122, lr_0 = 2.3470e-04
Loss = 9.2039e-02, PNorm = 82.3860, GNorm = 0.6407, lr_0 = 2.3454e-04
Loss = 1.0731e-01, PNorm = 82.3881, GNorm = 0.6775, lr_0 = 2.3437e-04
Loss = 9.2662e-02, PNorm = 82.3915, GNorm = 0.6104, lr_0 = 2.3421e-04
Loss = 1.0542e-01, PNorm = 82.3957, GNorm = 0.8505, lr_0 = 2.3405e-04
Loss = 9.8125e-02, PNorm = 82.4005, GNorm = 0.6047, lr_0 = 2.3389e-04
Loss = 9.9250e-02, PNorm = 82.4057, GNorm = 0.6090, lr_0 = 2.3373e-04
Loss = 9.5241e-02, PNorm = 82.4103, GNorm = 0.5983, lr_0 = 2.3357e-04
Loss = 8.9839e-02, PNorm = 82.4138, GNorm = 0.5817, lr_0 = 2.3341e-04
Loss = 7.3385e-02, PNorm = 82.4198, GNorm = 0.5411, lr_0 = 2.3325e-04
Loss = 8.8381e-02, PNorm = 82.4255, GNorm = 0.8973, lr_0 = 2.3309e-04
Loss = 1.0401e-01, PNorm = 82.4303, GNorm = 0.6485, lr_0 = 2.3293e-04
Loss = 1.0507e-01, PNorm = 82.4358, GNorm = 0.6509, lr_0 = 2.3277e-04
Loss = 9.7522e-02, PNorm = 82.4417, GNorm = 0.5601, lr_0 = 2.3261e-04
Loss = 9.0016e-02, PNorm = 82.4482, GNorm = 0.5313, lr_0 = 2.3246e-04
Loss = 9.4411e-02, PNorm = 82.4546, GNorm = 0.5343, lr_0 = 2.3230e-04
Loss = 8.5418e-02, PNorm = 82.4623, GNorm = 0.5864, lr_0 = 2.3214e-04
Loss = 9.2356e-02, PNorm = 82.4706, GNorm = 0.5828, lr_0 = 2.3198e-04
Loss = 1.0663e-01, PNorm = 82.4729, GNorm = 0.5841, lr_0 = 2.3182e-04
Loss = 9.7927e-02, PNorm = 82.4754, GNorm = 0.7793, lr_0 = 2.3166e-04
Loss = 9.3550e-02, PNorm = 82.4805, GNorm = 0.6495, lr_0 = 2.3150e-04
Loss = 1.1902e-01, PNorm = 82.4867, GNorm = 0.7732, lr_0 = 2.3134e-04
Loss = 9.3805e-02, PNorm = 82.4875, GNorm = 0.6744, lr_0 = 2.3118e-04
Loss = 8.5658e-02, PNorm = 82.4900, GNorm = 0.6440, lr_0 = 2.3103e-04
Loss = 9.1474e-02, PNorm = 82.4912, GNorm = 0.5796, lr_0 = 2.3087e-04
Loss = 1.0014e-01, PNorm = 82.4936, GNorm = 0.9691, lr_0 = 2.3071e-04
Loss = 1.0879e-01, PNorm = 82.4973, GNorm = 0.5159, lr_0 = 2.3055e-04
Loss = 9.1308e-02, PNorm = 82.4993, GNorm = 0.5864, lr_0 = 2.3039e-04
Loss = 9.5659e-02, PNorm = 82.5028, GNorm = 0.6476, lr_0 = 2.3024e-04
Loss = 9.7306e-02, PNorm = 82.5050, GNorm = 0.5752, lr_0 = 2.3008e-04
Loss = 9.8524e-02, PNorm = 82.5092, GNorm = 0.6994, lr_0 = 2.2992e-04
Loss = 9.6591e-02, PNorm = 82.5112, GNorm = 0.6761, lr_0 = 2.2976e-04
Loss = 1.0129e-01, PNorm = 82.5141, GNorm = 0.6556, lr_0 = 2.2961e-04
Loss = 9.0233e-02, PNorm = 82.5205, GNorm = 0.8195, lr_0 = 2.2945e-04
Loss = 8.1957e-02, PNorm = 82.5264, GNorm = 0.4883, lr_0 = 2.2929e-04
Loss = 9.3508e-02, PNorm = 82.5314, GNorm = 0.5828, lr_0 = 2.2913e-04
Loss = 9.9671e-02, PNorm = 82.5348, GNorm = 0.5991, lr_0 = 2.2898e-04
Loss = 8.1056e-02, PNorm = 82.5390, GNorm = 0.4814, lr_0 = 2.2882e-04
Loss = 8.7233e-02, PNorm = 82.5435, GNorm = 0.6223, lr_0 = 2.2866e-04
Loss = 1.0346e-01, PNorm = 82.5478, GNorm = 0.7064, lr_0 = 2.2851e-04
Loss = 9.3350e-02, PNorm = 82.5529, GNorm = 0.6145, lr_0 = 2.2835e-04
Loss = 9.5832e-02, PNorm = 82.5591, GNorm = 0.7843, lr_0 = 2.2819e-04
Loss = 9.6927e-02, PNorm = 82.5602, GNorm = 0.6300, lr_0 = 2.2804e-04
Loss = 1.0327e-01, PNorm = 82.5631, GNorm = 0.5110, lr_0 = 2.2788e-04
Loss = 9.4196e-02, PNorm = 82.5668, GNorm = 0.5327, lr_0 = 2.2773e-04
Loss = 1.0203e-01, PNorm = 82.5712, GNorm = 0.7068, lr_0 = 2.2757e-04
Validation mae = 0.228584
Epoch 20
Loss = 8.3406e-02, PNorm = 82.5764, GNorm = 0.5003, lr_0 = 2.2741e-04
Loss = 9.7578e-02, PNorm = 82.5809, GNorm = 0.5773, lr_0 = 2.2726e-04
Loss = 8.3703e-02, PNorm = 82.5833, GNorm = 0.4961, lr_0 = 2.2710e-04
Loss = 8.1945e-02, PNorm = 82.5883, GNorm = 0.5631, lr_0 = 2.2695e-04
Loss = 8.8858e-02, PNorm = 82.5949, GNorm = 0.6621, lr_0 = 2.2679e-04
Loss = 9.7077e-02, PNorm = 82.5995, GNorm = 0.8748, lr_0 = 2.2664e-04
Loss = 9.6972e-02, PNorm = 82.6041, GNorm = 0.6073, lr_0 = 2.2648e-04
Loss = 7.8047e-02, PNorm = 82.6081, GNorm = 0.5495, lr_0 = 2.2632e-04
Loss = 8.9490e-02, PNorm = 82.6140, GNorm = 0.7585, lr_0 = 2.2617e-04
Loss = 8.1571e-02, PNorm = 82.6189, GNorm = 0.6457, lr_0 = 2.2601e-04
Loss = 9.7910e-02, PNorm = 82.6226, GNorm = 0.6240, lr_0 = 2.2586e-04
Loss = 8.9491e-02, PNorm = 82.6303, GNorm = 0.9358, lr_0 = 2.2571e-04
Loss = 9.3703e-02, PNorm = 82.6337, GNorm = 0.6587, lr_0 = 2.2555e-04
Loss = 8.6637e-02, PNorm = 82.6381, GNorm = 0.6626, lr_0 = 2.2540e-04
Loss = 8.6530e-02, PNorm = 82.6447, GNorm = 0.5526, lr_0 = 2.2524e-04
Loss = 8.9767e-02, PNorm = 82.6495, GNorm = 0.5944, lr_0 = 2.2509e-04
Loss = 9.6013e-02, PNorm = 82.6522, GNorm = 0.8033, lr_0 = 2.2493e-04
Loss = 7.9645e-02, PNorm = 82.6568, GNorm = 0.8562, lr_0 = 2.2478e-04
Loss = 8.6350e-02, PNorm = 82.6625, GNorm = 0.4818, lr_0 = 2.2463e-04
Loss = 8.0312e-02, PNorm = 82.6678, GNorm = 0.6188, lr_0 = 2.2447e-04
Loss = 8.5036e-02, PNorm = 82.6715, GNorm = 0.6237, lr_0 = 2.2432e-04
Loss = 8.7233e-02, PNorm = 82.6748, GNorm = 0.4986, lr_0 = 2.2416e-04
Loss = 8.4913e-02, PNorm = 82.6771, GNorm = 0.5666, lr_0 = 2.2401e-04
Loss = 8.0018e-02, PNorm = 82.6792, GNorm = 0.5165, lr_0 = 2.2386e-04
Loss = 1.0381e-01, PNorm = 82.6853, GNorm = 0.7341, lr_0 = 2.2370e-04
Loss = 8.8296e-02, PNorm = 82.6925, GNorm = 0.6034, lr_0 = 2.2355e-04
Loss = 8.5003e-02, PNorm = 82.6968, GNorm = 0.6484, lr_0 = 2.2340e-04
Loss = 7.5089e-02, PNorm = 82.6997, GNorm = 0.4477, lr_0 = 2.2324e-04
Loss = 1.0273e-01, PNorm = 82.7045, GNorm = 0.5916, lr_0 = 2.2309e-04
Loss = 9.2816e-02, PNorm = 82.7082, GNorm = 0.6667, lr_0 = 2.2294e-04
Loss = 8.7418e-02, PNorm = 82.7132, GNorm = 0.5908, lr_0 = 2.2279e-04
Loss = 9.0647e-02, PNorm = 82.7183, GNorm = 0.7723, lr_0 = 2.2263e-04
Loss = 1.0427e-01, PNorm = 82.7214, GNorm = 0.9129, lr_0 = 2.2248e-04
Loss = 9.2315e-02, PNorm = 82.7255, GNorm = 0.7062, lr_0 = 2.2233e-04
Loss = 7.8926e-02, PNorm = 82.7304, GNorm = 0.4834, lr_0 = 2.2218e-04
Loss = 9.8103e-02, PNorm = 82.7357, GNorm = 0.6875, lr_0 = 2.2202e-04
Loss = 8.1458e-02, PNorm = 82.7402, GNorm = 0.5225, lr_0 = 2.2187e-04
Loss = 8.2425e-02, PNorm = 82.7425, GNorm = 0.5396, lr_0 = 2.2172e-04
Loss = 9.4191e-02, PNorm = 82.7426, GNorm = 0.6825, lr_0 = 2.2157e-04
Loss = 9.4133e-02, PNorm = 82.7444, GNorm = 0.6699, lr_0 = 2.2142e-04
Loss = 8.7634e-02, PNorm = 82.7493, GNorm = 0.6516, lr_0 = 2.2126e-04
Loss = 8.8168e-02, PNorm = 82.7516, GNorm = 0.5738, lr_0 = 2.2111e-04
Loss = 9.7243e-02, PNorm = 82.7569, GNorm = 0.4746, lr_0 = 2.2096e-04
Loss = 9.4495e-02, PNorm = 82.7592, GNorm = 0.7008, lr_0 = 2.2081e-04
Loss = 8.2485e-02, PNorm = 82.7637, GNorm = 0.4985, lr_0 = 2.2066e-04
Loss = 9.5727e-02, PNorm = 82.7688, GNorm = 0.5927, lr_0 = 2.2051e-04
Loss = 9.1752e-02, PNorm = 82.7727, GNorm = 0.6843, lr_0 = 2.2036e-04
Loss = 9.2431e-02, PNorm = 82.7773, GNorm = 0.6040, lr_0 = 2.2021e-04
Loss = 9.1697e-02, PNorm = 82.7827, GNorm = 0.6346, lr_0 = 2.2005e-04
Loss = 8.8670e-02, PNorm = 82.7840, GNorm = 0.7035, lr_0 = 2.1990e-04
Loss = 8.0447e-02, PNorm = 82.7860, GNorm = 0.8074, lr_0 = 2.1975e-04
Loss = 8.4786e-02, PNorm = 82.7871, GNorm = 0.6928, lr_0 = 2.1960e-04
Loss = 8.8304e-02, PNorm = 82.7877, GNorm = 0.7037, lr_0 = 2.1945e-04
Loss = 9.4618e-02, PNorm = 82.7919, GNorm = 0.6334, lr_0 = 2.1930e-04
Loss = 7.5772e-02, PNorm = 82.7979, GNorm = 0.5242, lr_0 = 2.1915e-04
Loss = 1.0377e-01, PNorm = 82.8022, GNorm = 0.7980, lr_0 = 2.1900e-04
Loss = 1.1075e-01, PNorm = 82.8056, GNorm = 0.6507, lr_0 = 2.1885e-04
Loss = 9.4182e-02, PNorm = 82.8098, GNorm = 0.5878, lr_0 = 2.1870e-04
Loss = 9.0890e-02, PNorm = 82.8136, GNorm = 0.4454, lr_0 = 2.1855e-04
Loss = 8.6150e-02, PNorm = 82.8184, GNorm = 0.5302, lr_0 = 2.1840e-04
Loss = 8.1290e-02, PNorm = 82.8229, GNorm = 0.5309, lr_0 = 2.1825e-04
Loss = 8.2996e-02, PNorm = 82.8270, GNorm = 0.5665, lr_0 = 2.1810e-04
Loss = 9.0142e-02, PNorm = 82.8330, GNorm = 0.6402, lr_0 = 2.1795e-04
Loss = 9.3205e-02, PNorm = 82.8377, GNorm = 0.6727, lr_0 = 2.1780e-04
Loss = 9.5670e-02, PNorm = 82.8393, GNorm = 0.6221, lr_0 = 2.1765e-04
Loss = 8.2352e-02, PNorm = 82.8410, GNorm = 0.7155, lr_0 = 2.1751e-04
Loss = 9.7702e-02, PNorm = 82.8443, GNorm = 0.4230, lr_0 = 2.1736e-04
Loss = 9.2034e-02, PNorm = 82.8501, GNorm = 0.5202, lr_0 = 2.1721e-04
Loss = 1.0830e-01, PNorm = 82.8535, GNorm = 0.5825, lr_0 = 2.1706e-04
Loss = 8.8444e-02, PNorm = 82.8583, GNorm = 0.4701, lr_0 = 2.1691e-04
Loss = 9.0800e-02, PNorm = 82.8620, GNorm = 0.5087, lr_0 = 2.1676e-04
Loss = 1.0254e-01, PNorm = 82.8638, GNorm = 0.6419, lr_0 = 2.1661e-04
Loss = 9.9811e-02, PNorm = 82.8675, GNorm = 0.7097, lr_0 = 2.1646e-04
Loss = 1.0014e-01, PNorm = 82.8723, GNorm = 0.7481, lr_0 = 2.1632e-04
Loss = 7.8213e-02, PNorm = 82.8743, GNorm = 0.8173, lr_0 = 2.1617e-04
Loss = 8.9493e-02, PNorm = 82.8781, GNorm = 0.6299, lr_0 = 2.1602e-04
Loss = 9.4637e-02, PNorm = 82.8868, GNorm = 0.9262, lr_0 = 2.1587e-04
Loss = 9.3704e-02, PNorm = 82.8899, GNorm = 0.7978, lr_0 = 2.1572e-04
Loss = 8.1066e-02, PNorm = 82.8921, GNorm = 0.6142, lr_0 = 2.1558e-04
Loss = 9.8910e-02, PNorm = 82.8958, GNorm = 0.5874, lr_0 = 2.1543e-04
Loss = 8.4064e-02, PNorm = 82.8986, GNorm = 0.5848, lr_0 = 2.1528e-04
Loss = 9.3244e-02, PNorm = 82.9031, GNorm = 0.6335, lr_0 = 2.1513e-04
Loss = 9.8641e-02, PNorm = 82.9082, GNorm = 0.8186, lr_0 = 2.1499e-04
Loss = 9.4311e-02, PNorm = 82.9120, GNorm = 0.7635, lr_0 = 2.1484e-04
Loss = 9.5283e-02, PNorm = 82.9174, GNorm = 0.7206, lr_0 = 2.1469e-04
Loss = 1.0292e-01, PNorm = 82.9171, GNorm = 0.6537, lr_0 = 2.1454e-04
Loss = 1.0338e-01, PNorm = 82.9208, GNorm = 0.6646, lr_0 = 2.1440e-04
Loss = 9.3880e-02, PNorm = 82.9275, GNorm = 0.4958, lr_0 = 2.1425e-04
Loss = 9.0367e-02, PNorm = 82.9324, GNorm = 0.5506, lr_0 = 2.1410e-04
Loss = 8.7369e-02, PNorm = 82.9361, GNorm = 0.4414, lr_0 = 2.1396e-04
Loss = 9.5822e-02, PNorm = 82.9400, GNorm = 0.5371, lr_0 = 2.1381e-04
Loss = 1.0274e-01, PNorm = 82.9458, GNorm = 0.5944, lr_0 = 2.1366e-04
Loss = 9.5467e-02, PNorm = 82.9499, GNorm = 0.7805, lr_0 = 2.1352e-04
Loss = 9.8181e-02, PNorm = 82.9563, GNorm = 0.6590, lr_0 = 2.1337e-04
Loss = 1.0130e-01, PNorm = 82.9622, GNorm = 0.5587, lr_0 = 2.1323e-04
Loss = 9.0388e-02, PNorm = 82.9652, GNorm = 0.6798, lr_0 = 2.1308e-04
Loss = 9.9285e-02, PNorm = 82.9686, GNorm = 0.6501, lr_0 = 2.1293e-04
Loss = 8.9583e-02, PNorm = 82.9707, GNorm = 0.6862, lr_0 = 2.1279e-04
Loss = 8.0652e-02, PNorm = 82.9738, GNorm = 0.6011, lr_0 = 2.1264e-04
Loss = 8.7668e-02, PNorm = 82.9783, GNorm = 0.5449, lr_0 = 2.1250e-04
Loss = 8.9849e-02, PNorm = 82.9801, GNorm = 0.5498, lr_0 = 2.1235e-04
Loss = 9.9236e-02, PNorm = 82.9844, GNorm = 0.8087, lr_0 = 2.1221e-04
Loss = 8.1429e-02, PNorm = 82.9848, GNorm = 0.5040, lr_0 = 2.1206e-04
Loss = 9.9965e-02, PNorm = 82.9887, GNorm = 0.5239, lr_0 = 2.1191e-04
Loss = 9.3949e-02, PNorm = 82.9957, GNorm = 0.5810, lr_0 = 2.1177e-04
Loss = 9.1902e-02, PNorm = 83.0009, GNorm = 1.1362, lr_0 = 2.1162e-04
Loss = 8.7677e-02, PNorm = 83.0030, GNorm = 0.6018, lr_0 = 2.1148e-04
Loss = 9.4957e-02, PNorm = 83.0061, GNorm = 0.5810, lr_0 = 2.1133e-04
Loss = 8.9439e-02, PNorm = 83.0080, GNorm = 0.8066, lr_0 = 2.1119e-04
Loss = 9.2457e-02, PNorm = 83.0098, GNorm = 0.6895, lr_0 = 2.1104e-04
Loss = 8.5673e-02, PNorm = 83.0138, GNorm = 0.7222, lr_0 = 2.1090e-04
Loss = 9.7643e-02, PNorm = 83.0160, GNorm = 0.8356, lr_0 = 2.1076e-04
Loss = 9.7288e-02, PNorm = 83.0182, GNorm = 0.6406, lr_0 = 2.1061e-04
Loss = 8.6886e-02, PNorm = 83.0208, GNorm = 0.6449, lr_0 = 2.1047e-04
Loss = 8.8555e-02, PNorm = 83.0258, GNorm = 1.0889, lr_0 = 2.1032e-04
Loss = 1.0427e-01, PNorm = 83.0286, GNorm = 0.6772, lr_0 = 2.1018e-04
Loss = 9.0950e-02, PNorm = 83.0350, GNorm = 0.6901, lr_0 = 2.1003e-04
Loss = 1.0267e-01, PNorm = 83.0438, GNorm = 0.7407, lr_0 = 2.0989e-04
Loss = 1.0918e-01, PNorm = 83.0470, GNorm = 0.6683, lr_0 = 2.0975e-04
Loss = 8.9380e-02, PNorm = 83.0504, GNorm = 0.5366, lr_0 = 2.0960e-04
Validation mae = 0.229062
Epoch 21
Loss = 9.3581e-02, PNorm = 83.0562, GNorm = 0.9041, lr_0 = 2.0946e-04
Loss = 9.7930e-02, PNorm = 83.0619, GNorm = 0.5645, lr_0 = 2.0932e-04
Loss = 8.4893e-02, PNorm = 83.0674, GNorm = 0.5245, lr_0 = 2.0917e-04
Loss = 8.8963e-02, PNorm = 83.0745, GNorm = 0.6323, lr_0 = 2.0903e-04
Loss = 9.2856e-02, PNorm = 83.0797, GNorm = 0.6707, lr_0 = 2.0889e-04
Loss = 8.6396e-02, PNorm = 83.0804, GNorm = 0.4756, lr_0 = 2.0874e-04
Loss = 8.8307e-02, PNorm = 83.0831, GNorm = 0.5794, lr_0 = 2.0860e-04
Loss = 7.8257e-02, PNorm = 83.0866, GNorm = 0.4165, lr_0 = 2.0846e-04
Loss = 9.2302e-02, PNorm = 83.0922, GNorm = 0.7264, lr_0 = 2.0831e-04
Loss = 8.1906e-02, PNorm = 83.0926, GNorm = 0.5057, lr_0 = 2.0817e-04
Loss = 8.9868e-02, PNorm = 83.0950, GNorm = 0.5533, lr_0 = 2.0803e-04
Loss = 9.5714e-02, PNorm = 83.0989, GNorm = 0.9513, lr_0 = 2.0789e-04
Loss = 8.9788e-02, PNorm = 83.1023, GNorm = 0.6503, lr_0 = 2.0774e-04
Loss = 9.6873e-02, PNorm = 83.1075, GNorm = 1.1228, lr_0 = 2.0760e-04
Loss = 9.6404e-02, PNorm = 83.1113, GNorm = 0.6926, lr_0 = 2.0746e-04
Loss = 9.2129e-02, PNorm = 83.1133, GNorm = 0.6751, lr_0 = 2.0732e-04
Loss = 9.9932e-02, PNorm = 83.1163, GNorm = 0.7657, lr_0 = 2.0718e-04
Loss = 8.8742e-02, PNorm = 83.1197, GNorm = 0.9534, lr_0 = 2.0703e-04
Loss = 9.2762e-02, PNorm = 83.1233, GNorm = 0.6544, lr_0 = 2.0689e-04
Loss = 8.6651e-02, PNorm = 83.1274, GNorm = 0.8889, lr_0 = 2.0675e-04
Loss = 9.2631e-02, PNorm = 83.1312, GNorm = 0.4774, lr_0 = 2.0661e-04
Loss = 8.2291e-02, PNorm = 83.1366, GNorm = 0.7699, lr_0 = 2.0647e-04
Loss = 8.9840e-02, PNorm = 83.1413, GNorm = 0.7028, lr_0 = 2.0633e-04
Loss = 9.6798e-02, PNorm = 83.1455, GNorm = 0.7984, lr_0 = 2.0618e-04
Loss = 9.4502e-02, PNorm = 83.1459, GNorm = 0.7504, lr_0 = 2.0604e-04
Loss = 7.4355e-02, PNorm = 83.1500, GNorm = 0.5858, lr_0 = 2.0590e-04
Loss = 8.4140e-02, PNorm = 83.1531, GNorm = 0.9464, lr_0 = 2.0576e-04
Loss = 8.7934e-02, PNorm = 83.1533, GNorm = 0.6652, lr_0 = 2.0562e-04
Loss = 9.2882e-02, PNorm = 83.1564, GNorm = 0.5720, lr_0 = 2.0548e-04
Loss = 9.2269e-02, PNorm = 83.1573, GNorm = 0.6010, lr_0 = 2.0534e-04
Loss = 9.3815e-02, PNorm = 83.1586, GNorm = 0.6036, lr_0 = 2.0520e-04
Loss = 8.2279e-02, PNorm = 83.1626, GNorm = 0.6077, lr_0 = 2.0506e-04
Loss = 8.6821e-02, PNorm = 83.1669, GNorm = 0.4635, lr_0 = 2.0492e-04
Loss = 7.7538e-02, PNorm = 83.1709, GNorm = 0.5191, lr_0 = 2.0478e-04
Loss = 8.9948e-02, PNorm = 83.1775, GNorm = 0.8563, lr_0 = 2.0464e-04
Loss = 8.8782e-02, PNorm = 83.1812, GNorm = 0.5467, lr_0 = 2.0450e-04
Loss = 9.9599e-02, PNorm = 83.1849, GNorm = 0.7843, lr_0 = 2.0436e-04
Loss = 9.5778e-02, PNorm = 83.1911, GNorm = 0.6289, lr_0 = 2.0422e-04
Loss = 8.1917e-02, PNorm = 83.1965, GNorm = 0.5648, lr_0 = 2.0408e-04
Loss = 8.8387e-02, PNorm = 83.1992, GNorm = 0.5717, lr_0 = 2.0394e-04
Loss = 7.6862e-02, PNorm = 83.2024, GNorm = 0.6259, lr_0 = 2.0380e-04
Loss = 8.7131e-02, PNorm = 83.2057, GNorm = 0.6996, lr_0 = 2.0366e-04
Loss = 8.7506e-02, PNorm = 83.2053, GNorm = 0.5359, lr_0 = 2.0352e-04
Loss = 9.7211e-02, PNorm = 83.2074, GNorm = 0.5602, lr_0 = 2.0338e-04
Loss = 8.0831e-02, PNorm = 83.2102, GNorm = 0.4696, lr_0 = 2.0324e-04
Loss = 7.5578e-02, PNorm = 83.2152, GNorm = 0.7259, lr_0 = 2.0310e-04
Loss = 8.8499e-02, PNorm = 83.2195, GNorm = 0.5859, lr_0 = 2.0296e-04
Loss = 8.9673e-02, PNorm = 83.2253, GNorm = 0.4825, lr_0 = 2.0282e-04
Loss = 7.9976e-02, PNorm = 83.2282, GNorm = 0.5296, lr_0 = 2.0268e-04
Loss = 8.6976e-02, PNorm = 83.2330, GNorm = 1.0188, lr_0 = 2.0254e-04
Loss = 8.7426e-02, PNorm = 83.2397, GNorm = 0.7541, lr_0 = 2.0240e-04
Loss = 8.9273e-02, PNorm = 83.2441, GNorm = 0.6151, lr_0 = 2.0227e-04
Loss = 8.5467e-02, PNorm = 83.2469, GNorm = 0.6982, lr_0 = 2.0213e-04
Loss = 7.7189e-02, PNorm = 83.2510, GNorm = 0.5573, lr_0 = 2.0199e-04
Loss = 8.3052e-02, PNorm = 83.2549, GNorm = 0.6727, lr_0 = 2.0185e-04
Loss = 9.2532e-02, PNorm = 83.2546, GNorm = 0.8613, lr_0 = 2.0171e-04
Loss = 1.0371e-01, PNorm = 83.2555, GNorm = 0.6714, lr_0 = 2.0157e-04
Loss = 8.6081e-02, PNorm = 83.2573, GNorm = 0.6353, lr_0 = 2.0144e-04
Loss = 9.4782e-02, PNorm = 83.2597, GNorm = 0.6542, lr_0 = 2.0130e-04
Loss = 9.1884e-02, PNorm = 83.2610, GNorm = 0.5718, lr_0 = 2.0116e-04
Loss = 9.4231e-02, PNorm = 83.2671, GNorm = 0.8877, lr_0 = 2.0102e-04
Loss = 9.3868e-02, PNorm = 83.2719, GNorm = 0.7990, lr_0 = 2.0088e-04
Loss = 9.3337e-02, PNorm = 83.2795, GNorm = 0.6382, lr_0 = 2.0075e-04
Loss = 8.8961e-02, PNorm = 83.2824, GNorm = 0.6409, lr_0 = 2.0061e-04
Loss = 8.7308e-02, PNorm = 83.2873, GNorm = 0.4417, lr_0 = 2.0047e-04
Loss = 9.3544e-02, PNorm = 83.2913, GNorm = 0.8954, lr_0 = 2.0033e-04
Loss = 9.1358e-02, PNorm = 83.2934, GNorm = 0.7701, lr_0 = 2.0020e-04
Loss = 9.0744e-02, PNorm = 83.2954, GNorm = 0.6442, lr_0 = 2.0006e-04
Loss = 8.0218e-02, PNorm = 83.2982, GNorm = 0.7246, lr_0 = 1.9992e-04
Loss = 8.3490e-02, PNorm = 83.3014, GNorm = 0.5709, lr_0 = 1.9979e-04
Loss = 7.8422e-02, PNorm = 83.3053, GNorm = 0.6457, lr_0 = 1.9965e-04
Loss = 8.3205e-02, PNorm = 83.3095, GNorm = 0.7426, lr_0 = 1.9951e-04
Loss = 8.7005e-02, PNorm = 83.3141, GNorm = 0.7681, lr_0 = 1.9938e-04
Loss = 8.4687e-02, PNorm = 83.3170, GNorm = 0.9746, lr_0 = 1.9924e-04
Loss = 9.0796e-02, PNorm = 83.3205, GNorm = 0.7738, lr_0 = 1.9910e-04
Loss = 8.4464e-02, PNorm = 83.3243, GNorm = 0.8307, lr_0 = 1.9897e-04
Loss = 8.1770e-02, PNorm = 83.3294, GNorm = 0.6618, lr_0 = 1.9883e-04
Loss = 8.4885e-02, PNorm = 83.3305, GNorm = 0.4763, lr_0 = 1.9869e-04
Loss = 8.5253e-02, PNorm = 83.3325, GNorm = 0.7822, lr_0 = 1.9856e-04
Loss = 8.0323e-02, PNorm = 83.3387, GNorm = 0.7293, lr_0 = 1.9842e-04
Loss = 7.9043e-02, PNorm = 83.3430, GNorm = 0.5554, lr_0 = 1.9829e-04
Loss = 9.4739e-02, PNorm = 83.3450, GNorm = 0.6313, lr_0 = 1.9815e-04
Loss = 8.9568e-02, PNorm = 83.3472, GNorm = 0.6157, lr_0 = 1.9801e-04
Loss = 8.6850e-02, PNorm = 83.3488, GNorm = 0.7242, lr_0 = 1.9788e-04
Loss = 8.1592e-02, PNorm = 83.3500, GNorm = 0.5138, lr_0 = 1.9774e-04
Loss = 8.8262e-02, PNorm = 83.3561, GNorm = 0.5540, lr_0 = 1.9761e-04
Loss = 8.3014e-02, PNorm = 83.3592, GNorm = 0.7578, lr_0 = 1.9747e-04
Loss = 9.4341e-02, PNorm = 83.3620, GNorm = 0.6934, lr_0 = 1.9734e-04
Loss = 9.0015e-02, PNorm = 83.3639, GNorm = 0.5162, lr_0 = 1.9720e-04
Loss = 8.2159e-02, PNorm = 83.3640, GNorm = 0.4283, lr_0 = 1.9707e-04
Loss = 8.2463e-02, PNorm = 83.3662, GNorm = 0.8148, lr_0 = 1.9693e-04
Loss = 9.9602e-02, PNorm = 83.3684, GNorm = 0.7007, lr_0 = 1.9680e-04
Loss = 8.3388e-02, PNorm = 83.3757, GNorm = 0.5906, lr_0 = 1.9666e-04
Loss = 8.8262e-02, PNorm = 83.3806, GNorm = 0.8226, lr_0 = 1.9653e-04
Loss = 7.6807e-02, PNorm = 83.3833, GNorm = 0.6970, lr_0 = 1.9639e-04
Loss = 9.8192e-02, PNorm = 83.3854, GNorm = 0.5208, lr_0 = 1.9626e-04
Loss = 9.4764e-02, PNorm = 83.3869, GNorm = 0.6242, lr_0 = 1.9612e-04
Loss = 8.3957e-02, PNorm = 83.3882, GNorm = 0.8890, lr_0 = 1.9599e-04
Loss = 8.9689e-02, PNorm = 83.3940, GNorm = 0.6655, lr_0 = 1.9585e-04
Loss = 1.0144e-01, PNorm = 83.3974, GNorm = 0.7081, lr_0 = 1.9572e-04
Loss = 8.3977e-02, PNorm = 83.4031, GNorm = 0.5979, lr_0 = 1.9559e-04
Loss = 9.2434e-02, PNorm = 83.4065, GNorm = 0.7096, lr_0 = 1.9545e-04
Loss = 9.4777e-02, PNorm = 83.4097, GNorm = 0.7804, lr_0 = 1.9532e-04
Loss = 9.5695e-02, PNorm = 83.4118, GNorm = 0.7562, lr_0 = 1.9518e-04
Loss = 8.9386e-02, PNorm = 83.4137, GNorm = 0.5363, lr_0 = 1.9505e-04
Loss = 9.0486e-02, PNorm = 83.4149, GNorm = 0.5315, lr_0 = 1.9492e-04
Loss = 1.0062e-01, PNorm = 83.4189, GNorm = 0.7980, lr_0 = 1.9478e-04
Loss = 7.9689e-02, PNorm = 83.4234, GNorm = 0.6238, lr_0 = 1.9465e-04
Loss = 9.2890e-02, PNorm = 83.4284, GNorm = 0.5134, lr_0 = 1.9452e-04
Loss = 8.4868e-02, PNorm = 83.4321, GNorm = 0.5266, lr_0 = 1.9438e-04
Loss = 9.3249e-02, PNorm = 83.4346, GNorm = 0.7760, lr_0 = 1.9425e-04
Loss = 7.9552e-02, PNorm = 83.4406, GNorm = 0.6346, lr_0 = 1.9412e-04
Loss = 8.9446e-02, PNorm = 83.4451, GNorm = 0.6941, lr_0 = 1.9398e-04
Loss = 8.0368e-02, PNorm = 83.4514, GNorm = 0.6413, lr_0 = 1.9385e-04
Loss = 9.2168e-02, PNorm = 83.4526, GNorm = 0.7160, lr_0 = 1.9372e-04
Loss = 8.6084e-02, PNorm = 83.4525, GNorm = 0.7062, lr_0 = 1.9359e-04
Loss = 1.0674e-01, PNorm = 83.4569, GNorm = 0.6359, lr_0 = 1.9345e-04
Loss = 1.0374e-01, PNorm = 83.4630, GNorm = 0.5734, lr_0 = 1.9332e-04
Loss = 8.4302e-02, PNorm = 83.4645, GNorm = 0.6863, lr_0 = 1.9319e-04
Loss = 9.5814e-02, PNorm = 83.4663, GNorm = 0.5265, lr_0 = 1.9306e-04
Validation mae = 0.228984
Epoch 22
Loss = 8.5540e-02, PNorm = 83.4715, GNorm = 0.8188, lr_0 = 1.9292e-04
Loss = 8.3481e-02, PNorm = 83.4778, GNorm = 0.7208, lr_0 = 1.9279e-04
Loss = 9.4788e-02, PNorm = 83.4831, GNorm = 0.6883, lr_0 = 1.9266e-04
Loss = 8.2307e-02, PNorm = 83.4884, GNorm = 0.4991, lr_0 = 1.9253e-04
Loss = 8.0867e-02, PNorm = 83.4905, GNorm = 0.5924, lr_0 = 1.9240e-04
Loss = 8.4841e-02, PNorm = 83.4925, GNorm = 0.5588, lr_0 = 1.9226e-04
Loss = 9.3389e-02, PNorm = 83.4977, GNorm = 0.6586, lr_0 = 1.9213e-04
Loss = 7.4961e-02, PNorm = 83.5006, GNorm = 0.6034, lr_0 = 1.9200e-04
Loss = 8.1773e-02, PNorm = 83.5044, GNorm = 0.6443, lr_0 = 1.9187e-04
Loss = 8.5491e-02, PNorm = 83.5093, GNorm = 0.5787, lr_0 = 1.9174e-04
Loss = 8.7326e-02, PNorm = 83.5120, GNorm = 1.0782, lr_0 = 1.9161e-04
Loss = 7.5651e-02, PNorm = 83.5129, GNorm = 0.6227, lr_0 = 1.9148e-04
Loss = 8.9300e-02, PNorm = 83.5178, GNorm = 0.5423, lr_0 = 1.9134e-04
Loss = 8.4317e-02, PNorm = 83.5198, GNorm = 0.7398, lr_0 = 1.9121e-04
Loss = 6.9016e-02, PNorm = 83.5212, GNorm = 0.6498, lr_0 = 1.9108e-04
Loss = 7.7852e-02, PNorm = 83.5238, GNorm = 0.6601, lr_0 = 1.9095e-04
Loss = 8.0292e-02, PNorm = 83.5262, GNorm = 0.5826, lr_0 = 1.9082e-04
Loss = 8.7393e-02, PNorm = 83.5280, GNorm = 0.5781, lr_0 = 1.9069e-04
Loss = 7.8873e-02, PNorm = 83.5323, GNorm = 0.8573, lr_0 = 1.9056e-04
Loss = 8.3943e-02, PNorm = 83.5358, GNorm = 0.6120, lr_0 = 1.9043e-04
Loss = 9.4482e-02, PNorm = 83.5367, GNorm = 0.8572, lr_0 = 1.9030e-04
Loss = 7.9876e-02, PNorm = 83.5413, GNorm = 0.5328, lr_0 = 1.9017e-04
Loss = 8.7524e-02, PNorm = 83.5459, GNorm = 0.4495, lr_0 = 1.9004e-04
Loss = 7.6275e-02, PNorm = 83.5514, GNorm = 0.4563, lr_0 = 1.8991e-04
Loss = 7.6213e-02, PNorm = 83.5541, GNorm = 0.4650, lr_0 = 1.8978e-04
Loss = 9.0314e-02, PNorm = 83.5566, GNorm = 0.5179, lr_0 = 1.8965e-04
Loss = 7.8474e-02, PNorm = 83.5601, GNorm = 0.8318, lr_0 = 1.8952e-04
Loss = 8.2594e-02, PNorm = 83.5633, GNorm = 0.5628, lr_0 = 1.8939e-04
Loss = 8.3661e-02, PNorm = 83.5656, GNorm = 0.4587, lr_0 = 1.8926e-04
Loss = 9.5984e-02, PNorm = 83.5682, GNorm = 0.6563, lr_0 = 1.8913e-04
Loss = 1.0037e-01, PNorm = 83.5727, GNorm = 0.7806, lr_0 = 1.8900e-04
Loss = 9.3623e-02, PNorm = 83.5770, GNorm = 0.7178, lr_0 = 1.8887e-04
Loss = 8.9606e-02, PNorm = 83.5818, GNorm = 0.6852, lr_0 = 1.8874e-04
Loss = 8.7460e-02, PNorm = 83.5862, GNorm = 0.8114, lr_0 = 1.8861e-04
Loss = 8.5415e-02, PNorm = 83.5892, GNorm = 0.7830, lr_0 = 1.8848e-04
Loss = 8.6000e-02, PNorm = 83.5898, GNorm = 0.6041, lr_0 = 1.8835e-04
Loss = 8.7713e-02, PNorm = 83.5903, GNorm = 0.4866, lr_0 = 1.8822e-04
Loss = 8.5866e-02, PNorm = 83.5928, GNorm = 0.5086, lr_0 = 1.8809e-04
Loss = 8.6929e-02, PNorm = 83.5962, GNorm = 0.4861, lr_0 = 1.8797e-04
Loss = 9.3857e-02, PNorm = 83.5980, GNorm = 0.5249, lr_0 = 1.8784e-04
Loss = 9.2448e-02, PNorm = 83.6008, GNorm = 0.6565, lr_0 = 1.8771e-04
Loss = 8.6611e-02, PNorm = 83.6070, GNorm = 0.5450, lr_0 = 1.8758e-04
Loss = 8.1555e-02, PNorm = 83.6108, GNorm = 0.6087, lr_0 = 1.8745e-04
Loss = 7.7105e-02, PNorm = 83.6125, GNorm = 0.6087, lr_0 = 1.8732e-04
Loss = 8.1773e-02, PNorm = 83.6161, GNorm = 0.8139, lr_0 = 1.8719e-04
Loss = 7.9651e-02, PNorm = 83.6193, GNorm = 0.6193, lr_0 = 1.8707e-04
Loss = 8.4155e-02, PNorm = 83.6218, GNorm = 0.7220, lr_0 = 1.8694e-04
Loss = 8.2999e-02, PNorm = 83.6260, GNorm = 0.5856, lr_0 = 1.8681e-04
Loss = 1.0087e-01, PNorm = 83.6281, GNorm = 0.6335, lr_0 = 1.8668e-04
Loss = 7.7678e-02, PNorm = 83.6287, GNorm = 0.6693, lr_0 = 1.8655e-04
Loss = 8.6935e-02, PNorm = 83.6322, GNorm = 1.0409, lr_0 = 1.8643e-04
Loss = 8.3974e-02, PNorm = 83.6350, GNorm = 0.6781, lr_0 = 1.8630e-04
Loss = 1.0370e-01, PNorm = 83.6394, GNorm = 0.8353, lr_0 = 1.8617e-04
Loss = 8.9440e-02, PNorm = 83.6419, GNorm = 0.6102, lr_0 = 1.8604e-04
Loss = 8.7717e-02, PNorm = 83.6431, GNorm = 0.6147, lr_0 = 1.8592e-04
Loss = 8.3484e-02, PNorm = 83.6478, GNorm = 0.6507, lr_0 = 1.8579e-04
Loss = 7.0457e-02, PNorm = 83.6527, GNorm = 0.6815, lr_0 = 1.8566e-04
Loss = 8.1000e-02, PNorm = 83.6574, GNorm = 0.5441, lr_0 = 1.8553e-04
Loss = 9.8761e-02, PNorm = 83.6639, GNorm = 0.6302, lr_0 = 1.8541e-04
Loss = 8.2536e-02, PNorm = 83.6650, GNorm = 0.5654, lr_0 = 1.8528e-04
Loss = 7.7201e-02, PNorm = 83.6658, GNorm = 0.6010, lr_0 = 1.8515e-04
Loss = 8.6056e-02, PNorm = 83.6688, GNorm = 0.5295, lr_0 = 1.8503e-04
Loss = 8.6862e-02, PNorm = 83.6716, GNorm = 0.5216, lr_0 = 1.8490e-04
Loss = 8.6502e-02, PNorm = 83.6746, GNorm = 0.6687, lr_0 = 1.8477e-04
Loss = 8.7109e-02, PNorm = 83.6776, GNorm = 0.7817, lr_0 = 1.8465e-04
Loss = 9.5371e-02, PNorm = 83.6792, GNorm = 0.6783, lr_0 = 1.8452e-04
Loss = 9.9288e-02, PNorm = 83.6816, GNorm = 0.8036, lr_0 = 1.8439e-04
Loss = 8.7367e-02, PNorm = 83.6840, GNorm = 0.8196, lr_0 = 1.8427e-04
Loss = 7.6277e-02, PNorm = 83.6897, GNorm = 0.6052, lr_0 = 1.8414e-04
Loss = 8.3422e-02, PNorm = 83.6912, GNorm = 0.3918, lr_0 = 1.8401e-04
Loss = 1.0295e-01, PNorm = 83.6973, GNorm = 1.0675, lr_0 = 1.8389e-04
Loss = 8.6913e-02, PNorm = 83.7009, GNorm = 0.8211, lr_0 = 1.8376e-04
Loss = 9.7309e-02, PNorm = 83.7037, GNorm = 0.6731, lr_0 = 1.8364e-04
Loss = 1.0266e-01, PNorm = 83.7049, GNorm = 1.0921, lr_0 = 1.8351e-04
Loss = 8.6845e-02, PNorm = 83.7080, GNorm = 0.8002, lr_0 = 1.8338e-04
Loss = 8.7303e-02, PNorm = 83.7121, GNorm = 0.6920, lr_0 = 1.8326e-04
Loss = 8.7760e-02, PNorm = 83.7162, GNorm = 0.7735, lr_0 = 1.8313e-04
Loss = 7.9786e-02, PNorm = 83.7202, GNorm = 0.6098, lr_0 = 1.8301e-04
Loss = 8.3557e-02, PNorm = 83.7241, GNorm = 0.6336, lr_0 = 1.8288e-04
Loss = 8.2518e-02, PNorm = 83.7266, GNorm = 0.4645, lr_0 = 1.8276e-04
Loss = 8.2917e-02, PNorm = 83.7295, GNorm = 0.5493, lr_0 = 1.8263e-04
Loss = 1.0055e-01, PNorm = 83.7343, GNorm = 0.7531, lr_0 = 1.8251e-04
Loss = 8.2206e-02, PNorm = 83.7383, GNorm = 0.9906, lr_0 = 1.8238e-04
Loss = 8.8242e-02, PNorm = 83.7436, GNorm = 0.6167, lr_0 = 1.8226e-04
Loss = 8.7089e-02, PNorm = 83.7460, GNorm = 0.8153, lr_0 = 1.8213e-04
Loss = 9.1427e-02, PNorm = 83.7477, GNorm = 0.6705, lr_0 = 1.8201e-04
Loss = 1.0459e-01, PNorm = 83.7492, GNorm = 0.7646, lr_0 = 1.8188e-04
Loss = 8.8599e-02, PNorm = 83.7525, GNorm = 0.6024, lr_0 = 1.8176e-04
Loss = 8.5669e-02, PNorm = 83.7557, GNorm = 0.7117, lr_0 = 1.8163e-04
Loss = 1.0205e-01, PNorm = 83.7594, GNorm = 0.8451, lr_0 = 1.8151e-04
Loss = 8.0796e-02, PNorm = 83.7645, GNorm = 0.7819, lr_0 = 1.8138e-04
Loss = 8.6876e-02, PNorm = 83.7688, GNorm = 0.7306, lr_0 = 1.8126e-04
Loss = 8.8428e-02, PNorm = 83.7721, GNorm = 0.6690, lr_0 = 1.8114e-04
Loss = 8.9845e-02, PNorm = 83.7738, GNorm = 0.7541, lr_0 = 1.8101e-04
Loss = 7.5375e-02, PNorm = 83.7766, GNorm = 0.5532, lr_0 = 1.8089e-04
Loss = 8.6587e-02, PNorm = 83.7792, GNorm = 0.5784, lr_0 = 1.8076e-04
Loss = 8.2721e-02, PNorm = 83.7801, GNorm = 0.7737, lr_0 = 1.8064e-04
Loss = 8.0561e-02, PNorm = 83.7817, GNorm = 0.4776, lr_0 = 1.8052e-04
Loss = 8.8027e-02, PNorm = 83.7848, GNorm = 0.4966, lr_0 = 1.8039e-04
Loss = 8.4984e-02, PNorm = 83.7883, GNorm = 0.5544, lr_0 = 1.8027e-04
Loss = 8.9605e-02, PNorm = 83.7891, GNorm = 0.5273, lr_0 = 1.8015e-04
Loss = 8.2065e-02, PNorm = 83.7896, GNorm = 0.5350, lr_0 = 1.8002e-04
Loss = 8.4812e-02, PNorm = 83.7924, GNorm = 0.7529, lr_0 = 1.7990e-04
Loss = 7.5081e-02, PNorm = 83.7954, GNorm = 0.6245, lr_0 = 1.7978e-04
Loss = 8.3284e-02, PNorm = 83.7984, GNorm = 0.6564, lr_0 = 1.7965e-04
Loss = 8.9676e-02, PNorm = 83.8023, GNorm = 0.5761, lr_0 = 1.7953e-04
Loss = 8.9076e-02, PNorm = 83.8041, GNorm = 1.1715, lr_0 = 1.7941e-04
Loss = 8.1702e-02, PNorm = 83.8057, GNorm = 0.8013, lr_0 = 1.7928e-04
Loss = 8.0754e-02, PNorm = 83.8073, GNorm = 0.7887, lr_0 = 1.7916e-04
Loss = 7.9081e-02, PNorm = 83.8093, GNorm = 0.5973, lr_0 = 1.7904e-04
Loss = 8.7656e-02, PNorm = 83.8127, GNorm = 0.5774, lr_0 = 1.7892e-04
Loss = 9.4095e-02, PNorm = 83.8149, GNorm = 0.7806, lr_0 = 1.7879e-04
Loss = 8.3178e-02, PNorm = 83.8189, GNorm = 0.6560, lr_0 = 1.7867e-04
Loss = 9.4493e-02, PNorm = 83.8194, GNorm = 0.8676, lr_0 = 1.7855e-04
Loss = 9.2274e-02, PNorm = 83.8222, GNorm = 0.7921, lr_0 = 1.7843e-04
Loss = 9.9258e-02, PNorm = 83.8251, GNorm = 0.5812, lr_0 = 1.7830e-04
Loss = 9.5061e-02, PNorm = 83.8273, GNorm = 0.7008, lr_0 = 1.7818e-04
Loss = 7.3645e-02, PNorm = 83.8271, GNorm = 0.4494, lr_0 = 1.7806e-04
Loss = 8.8295e-02, PNorm = 83.8287, GNorm = 0.4776, lr_0 = 1.7794e-04
Loss = 9.3818e-02, PNorm = 83.8319, GNorm = 1.0128, lr_0 = 1.7782e-04
Validation mae = 0.226679
Epoch 23
Loss = 7.5841e-02, PNorm = 83.8373, GNorm = 0.5844, lr_0 = 1.7769e-04
Loss = 7.2638e-02, PNorm = 83.8407, GNorm = 0.5223, lr_0 = 1.7757e-04
Loss = 8.1794e-02, PNorm = 83.8431, GNorm = 0.5912, lr_0 = 1.7745e-04
Loss = 8.3207e-02, PNorm = 83.8476, GNorm = 0.5717, lr_0 = 1.7733e-04
Loss = 8.6342e-02, PNorm = 83.8507, GNorm = 0.5063, lr_0 = 1.7721e-04
Loss = 7.2460e-02, PNorm = 83.8523, GNorm = 0.6602, lr_0 = 1.7709e-04
Loss = 8.3582e-02, PNorm = 83.8571, GNorm = 0.5790, lr_0 = 1.7696e-04
Loss = 8.3088e-02, PNorm = 83.8620, GNorm = 0.6562, lr_0 = 1.7684e-04
Loss = 7.3071e-02, PNorm = 83.8628, GNorm = 0.5432, lr_0 = 1.7672e-04
Loss = 8.8153e-02, PNorm = 83.8649, GNorm = 0.7119, lr_0 = 1.7660e-04
Loss = 7.7997e-02, PNorm = 83.8717, GNorm = 0.6253, lr_0 = 1.7648e-04
Loss = 8.7226e-02, PNorm = 83.8756, GNorm = 0.5937, lr_0 = 1.7636e-04
Loss = 9.9652e-02, PNorm = 83.8780, GNorm = 0.6859, lr_0 = 1.7624e-04
Loss = 7.8659e-02, PNorm = 83.8805, GNorm = 0.5241, lr_0 = 1.7612e-04
Loss = 8.3823e-02, PNorm = 83.8836, GNorm = 0.7110, lr_0 = 1.7600e-04
Loss = 7.7983e-02, PNorm = 83.8847, GNorm = 0.6765, lr_0 = 1.7588e-04
Loss = 8.7941e-02, PNorm = 83.8855, GNorm = 0.7385, lr_0 = 1.7576e-04
Loss = 9.1842e-02, PNorm = 83.8884, GNorm = 0.6960, lr_0 = 1.7564e-04
Loss = 8.4299e-02, PNorm = 83.8923, GNorm = 0.5126, lr_0 = 1.7552e-04
Loss = 8.3388e-02, PNorm = 83.8951, GNorm = 0.5585, lr_0 = 1.7540e-04
Loss = 9.3057e-02, PNorm = 83.8976, GNorm = 0.5242, lr_0 = 1.7528e-04
Loss = 8.3190e-02, PNorm = 83.8985, GNorm = 0.5629, lr_0 = 1.7516e-04
Loss = 9.7340e-02, PNorm = 83.9032, GNorm = 0.9459, lr_0 = 1.7504e-04
Loss = 8.4790e-02, PNorm = 83.9082, GNorm = 0.6498, lr_0 = 1.7492e-04
Loss = 1.0265e-01, PNorm = 83.9119, GNorm = 0.9044, lr_0 = 1.7480e-04
Loss = 8.7800e-02, PNorm = 83.9155, GNorm = 0.5816, lr_0 = 1.7468e-04
Loss = 9.0259e-02, PNorm = 83.9178, GNorm = 0.7111, lr_0 = 1.7456e-04
Loss = 8.5046e-02, PNorm = 83.9203, GNorm = 0.5296, lr_0 = 1.7444e-04
Loss = 7.7822e-02, PNorm = 83.9225, GNorm = 0.6857, lr_0 = 1.7432e-04
Loss = 8.1301e-02, PNorm = 83.9243, GNorm = 0.6538, lr_0 = 1.7420e-04
Loss = 8.6454e-02, PNorm = 83.9263, GNorm = 0.5139, lr_0 = 1.7408e-04
Loss = 7.7088e-02, PNorm = 83.9279, GNorm = 0.4790, lr_0 = 1.7396e-04
Loss = 8.4396e-02, PNorm = 83.9306, GNorm = 0.5246, lr_0 = 1.7384e-04
Loss = 7.7153e-02, PNorm = 83.9359, GNorm = 0.8395, lr_0 = 1.7372e-04
Loss = 8.1755e-02, PNorm = 83.9387, GNorm = 0.5719, lr_0 = 1.7360e-04
Loss = 8.5407e-02, PNorm = 83.9399, GNorm = 0.8625, lr_0 = 1.7348e-04
Loss = 9.3971e-02, PNorm = 83.9447, GNorm = 0.6997, lr_0 = 1.7336e-04
Loss = 8.4993e-02, PNorm = 83.9484, GNorm = 0.6996, lr_0 = 1.7325e-04
Loss = 8.0300e-02, PNorm = 83.9543, GNorm = 0.7535, lr_0 = 1.7313e-04
Loss = 9.2219e-02, PNorm = 83.9561, GNorm = 0.9044, lr_0 = 1.7301e-04
Loss = 8.4819e-02, PNorm = 83.9583, GNorm = 0.7240, lr_0 = 1.7289e-04
Loss = 7.7514e-02, PNorm = 83.9605, GNorm = 0.5727, lr_0 = 1.7277e-04
Loss = 9.0409e-02, PNorm = 83.9627, GNorm = 0.4916, lr_0 = 1.7265e-04
Loss = 7.9233e-02, PNorm = 83.9650, GNorm = 0.5918, lr_0 = 1.7253e-04
Loss = 8.2984e-02, PNorm = 83.9690, GNorm = 0.7824, lr_0 = 1.7242e-04
Loss = 8.1061e-02, PNorm = 83.9727, GNorm = 0.7017, lr_0 = 1.7230e-04
Loss = 9.1927e-02, PNorm = 83.9731, GNorm = 0.5898, lr_0 = 1.7218e-04
Loss = 8.2628e-02, PNorm = 83.9761, GNorm = 0.7903, lr_0 = 1.7206e-04
Loss = 7.1294e-02, PNorm = 83.9811, GNorm = 0.4929, lr_0 = 1.7194e-04
Loss = 8.2554e-02, PNorm = 83.9835, GNorm = 0.6031, lr_0 = 1.7183e-04
Loss = 7.9429e-02, PNorm = 83.9821, GNorm = 0.6114, lr_0 = 1.7171e-04
Loss = 7.9784e-02, PNorm = 83.9851, GNorm = 0.4797, lr_0 = 1.7159e-04
Loss = 8.2447e-02, PNorm = 83.9886, GNorm = 0.5494, lr_0 = 1.7147e-04
Loss = 8.1829e-02, PNorm = 83.9917, GNorm = 0.6571, lr_0 = 1.7136e-04
Loss = 7.1301e-02, PNorm = 83.9932, GNorm = 0.5746, lr_0 = 1.7124e-04
Loss = 8.3751e-02, PNorm = 83.9954, GNorm = 0.7810, lr_0 = 1.7112e-04
Loss = 8.1646e-02, PNorm = 83.9993, GNorm = 0.4700, lr_0 = 1.7100e-04
Loss = 8.1334e-02, PNorm = 84.0046, GNorm = 0.7950, lr_0 = 1.7089e-04
Loss = 8.5230e-02, PNorm = 84.0069, GNorm = 0.5677, lr_0 = 1.7077e-04
Loss = 8.3731e-02, PNorm = 84.0078, GNorm = 0.7194, lr_0 = 1.7065e-04
Loss = 7.2885e-02, PNorm = 84.0098, GNorm = 0.5361, lr_0 = 1.7054e-04
Loss = 8.5518e-02, PNorm = 84.0127, GNorm = 0.7360, lr_0 = 1.7042e-04
Loss = 8.2868e-02, PNorm = 84.0134, GNorm = 0.7206, lr_0 = 1.7030e-04
Loss = 8.7816e-02, PNorm = 84.0156, GNorm = 0.7072, lr_0 = 1.7019e-04
Loss = 9.2057e-02, PNorm = 84.0201, GNorm = 0.9076, lr_0 = 1.7007e-04
Loss = 7.7090e-02, PNorm = 84.0232, GNorm = 0.4403, lr_0 = 1.6995e-04
Loss = 8.8207e-02, PNorm = 84.0251, GNorm = 0.7643, lr_0 = 1.6984e-04
Loss = 8.1640e-02, PNorm = 84.0280, GNorm = 0.6367, lr_0 = 1.6972e-04
Loss = 8.0382e-02, PNorm = 84.0313, GNorm = 0.9577, lr_0 = 1.6960e-04
Loss = 8.0804e-02, PNorm = 84.0356, GNorm = 0.6680, lr_0 = 1.6949e-04
Loss = 9.1973e-02, PNorm = 84.0381, GNorm = 0.5758, lr_0 = 1.6937e-04
Loss = 9.3208e-02, PNorm = 84.0419, GNorm = 0.6718, lr_0 = 1.6926e-04
Loss = 8.3252e-02, PNorm = 84.0446, GNorm = 0.4707, lr_0 = 1.6914e-04
Loss = 8.2268e-02, PNorm = 84.0474, GNorm = 0.6049, lr_0 = 1.6902e-04
Loss = 9.3760e-02, PNorm = 84.0501, GNorm = 0.5568, lr_0 = 1.6891e-04
Loss = 7.9641e-02, PNorm = 84.0523, GNorm = 0.5574, lr_0 = 1.6879e-04
Loss = 8.0782e-02, PNorm = 84.0556, GNorm = 0.5516, lr_0 = 1.6868e-04
Loss = 9.5736e-02, PNorm = 84.0568, GNorm = 1.0666, lr_0 = 1.6856e-04
Loss = 7.6366e-02, PNorm = 84.0599, GNorm = 0.6012, lr_0 = 1.6845e-04
Loss = 6.9376e-02, PNorm = 84.0621, GNorm = 0.6657, lr_0 = 1.6833e-04
Loss = 9.3715e-02, PNorm = 84.0623, GNorm = 0.5627, lr_0 = 1.6821e-04
Loss = 7.8242e-02, PNorm = 84.0655, GNorm = 0.5301, lr_0 = 1.6810e-04
Loss = 8.6139e-02, PNorm = 84.0688, GNorm = 0.6078, lr_0 = 1.6798e-04
Loss = 9.0042e-02, PNorm = 84.0710, GNorm = 0.6336, lr_0 = 1.6787e-04
Loss = 9.0443e-02, PNorm = 84.0749, GNorm = 0.6505, lr_0 = 1.6775e-04
Loss = 8.7041e-02, PNorm = 84.0775, GNorm = 0.5366, lr_0 = 1.6764e-04
Loss = 7.8113e-02, PNorm = 84.0806, GNorm = 0.5410, lr_0 = 1.6752e-04
Loss = 8.5950e-02, PNorm = 84.0838, GNorm = 0.4339, lr_0 = 1.6741e-04
Loss = 8.7747e-02, PNorm = 84.0865, GNorm = 0.6973, lr_0 = 1.6729e-04
Loss = 7.9683e-02, PNorm = 84.0906, GNorm = 0.6201, lr_0 = 1.6718e-04
Loss = 8.2967e-02, PNorm = 84.0924, GNorm = 0.6092, lr_0 = 1.6707e-04
Loss = 8.3916e-02, PNorm = 84.0947, GNorm = 0.7532, lr_0 = 1.6695e-04
Loss = 9.1063e-02, PNorm = 84.0981, GNorm = 0.7731, lr_0 = 1.6684e-04
Loss = 1.0673e-01, PNorm = 84.1004, GNorm = 1.1992, lr_0 = 1.6672e-04
Loss = 7.3364e-02, PNorm = 84.1033, GNorm = 0.5308, lr_0 = 1.6661e-04
Loss = 8.0553e-02, PNorm = 84.1066, GNorm = 0.4956, lr_0 = 1.6649e-04
Loss = 9.1204e-02, PNorm = 84.1081, GNorm = 0.5909, lr_0 = 1.6638e-04
Loss = 8.4078e-02, PNorm = 84.1118, GNorm = 0.8603, lr_0 = 1.6627e-04
Loss = 7.7125e-02, PNorm = 84.1132, GNorm = 0.7321, lr_0 = 1.6615e-04
Loss = 8.1828e-02, PNorm = 84.1150, GNorm = 0.7017, lr_0 = 1.6604e-04
Loss = 8.4655e-02, PNorm = 84.1163, GNorm = 0.5117, lr_0 = 1.6592e-04
Loss = 9.1276e-02, PNorm = 84.1191, GNorm = 0.7854, lr_0 = 1.6581e-04
Loss = 7.6583e-02, PNorm = 84.1199, GNorm = 0.5518, lr_0 = 1.6570e-04
Loss = 8.1691e-02, PNorm = 84.1219, GNorm = 0.7126, lr_0 = 1.6558e-04
Loss = 8.6868e-02, PNorm = 84.1228, GNorm = 0.6792, lr_0 = 1.6547e-04
Loss = 8.1446e-02, PNorm = 84.1253, GNorm = 0.6718, lr_0 = 1.6536e-04
Loss = 8.4623e-02, PNorm = 84.1271, GNorm = 0.9067, lr_0 = 1.6524e-04
Loss = 8.3442e-02, PNorm = 84.1319, GNorm = 0.5037, lr_0 = 1.6513e-04
Loss = 8.7741e-02, PNorm = 84.1364, GNorm = 0.6347, lr_0 = 1.6502e-04
Loss = 8.7610e-02, PNorm = 84.1393, GNorm = 0.8535, lr_0 = 1.6490e-04
Loss = 8.5477e-02, PNorm = 84.1427, GNorm = 0.6448, lr_0 = 1.6479e-04
Loss = 8.8266e-02, PNorm = 84.1451, GNorm = 0.8865, lr_0 = 1.6468e-04
Loss = 8.5434e-02, PNorm = 84.1468, GNorm = 0.5553, lr_0 = 1.6457e-04
Loss = 8.0964e-02, PNorm = 84.1484, GNorm = 0.4406, lr_0 = 1.6445e-04
Loss = 9.1152e-02, PNorm = 84.1521, GNorm = 0.8995, lr_0 = 1.6434e-04
Loss = 8.9224e-02, PNorm = 84.1540, GNorm = 0.6345, lr_0 = 1.6423e-04
Loss = 8.3751e-02, PNorm = 84.1595, GNorm = 0.5305, lr_0 = 1.6412e-04
Loss = 8.4731e-02, PNorm = 84.1636, GNorm = 0.6089, lr_0 = 1.6400e-04
Loss = 8.6084e-02, PNorm = 84.1648, GNorm = 0.6660, lr_0 = 1.6389e-04
Loss = 9.9608e-02, PNorm = 84.1667, GNorm = 0.6660, lr_0 = 1.6378e-04
Validation mae = 0.228262
Epoch 24
Loss = 7.9762e-02, PNorm = 84.1690, GNorm = 0.8157, lr_0 = 1.6367e-04
Loss = 7.2931e-02, PNorm = 84.1711, GNorm = 0.5113, lr_0 = 1.6355e-04
Loss = 8.1616e-02, PNorm = 84.1709, GNorm = 0.5282, lr_0 = 1.6344e-04
Loss = 7.6383e-02, PNorm = 84.1708, GNorm = 0.6785, lr_0 = 1.6333e-04
Loss = 8.7544e-02, PNorm = 84.1726, GNorm = 0.6830, lr_0 = 1.6322e-04
Loss = 6.7745e-02, PNorm = 84.1755, GNorm = 0.5219, lr_0 = 1.6311e-04
Loss = 7.3361e-02, PNorm = 84.1772, GNorm = 0.6384, lr_0 = 1.6299e-04
Loss = 7.4049e-02, PNorm = 84.1800, GNorm = 0.6788, lr_0 = 1.6288e-04
Loss = 8.3147e-02, PNorm = 84.1843, GNorm = 0.5866, lr_0 = 1.6277e-04
Loss = 8.8732e-02, PNorm = 84.1862, GNorm = 0.6317, lr_0 = 1.6266e-04
Loss = 7.6485e-02, PNorm = 84.1878, GNorm = 0.6099, lr_0 = 1.6255e-04
Loss = 7.6623e-02, PNorm = 84.1905, GNorm = 0.5760, lr_0 = 1.6244e-04
Loss = 7.3075e-02, PNorm = 84.1931, GNorm = 0.5877, lr_0 = 1.6233e-04
Loss = 8.1374e-02, PNorm = 84.1957, GNorm = 0.5784, lr_0 = 1.6221e-04
Loss = 7.8288e-02, PNorm = 84.1992, GNorm = 0.6173, lr_0 = 1.6210e-04
Loss = 7.0489e-02, PNorm = 84.1992, GNorm = 0.6190, lr_0 = 1.6199e-04
Loss = 8.0712e-02, PNorm = 84.2011, GNorm = 0.5163, lr_0 = 1.6188e-04
Loss = 8.1452e-02, PNorm = 84.2067, GNorm = 0.7210, lr_0 = 1.6177e-04
Loss = 7.7355e-02, PNorm = 84.2092, GNorm = 0.6274, lr_0 = 1.6166e-04
Loss = 7.1567e-02, PNorm = 84.2113, GNorm = 0.6951, lr_0 = 1.6155e-04
Loss = 7.9460e-02, PNorm = 84.2138, GNorm = 0.8637, lr_0 = 1.6144e-04
Loss = 7.2411e-02, PNorm = 84.2161, GNorm = 0.4625, lr_0 = 1.6133e-04
Loss = 7.7531e-02, PNorm = 84.2189, GNorm = 0.6074, lr_0 = 1.6122e-04
Loss = 8.9470e-02, PNorm = 84.2204, GNorm = 0.5815, lr_0 = 1.6111e-04
Loss = 8.2345e-02, PNorm = 84.2216, GNorm = 0.5530, lr_0 = 1.6100e-04
Loss = 8.0702e-02, PNorm = 84.2233, GNorm = 0.5423, lr_0 = 1.6089e-04
Loss = 8.8165e-02, PNorm = 84.2271, GNorm = 0.6961, lr_0 = 1.6078e-04
Loss = 8.5162e-02, PNorm = 84.2303, GNorm = 0.6486, lr_0 = 1.6067e-04
Loss = 9.2132e-02, PNorm = 84.2348, GNorm = 0.6081, lr_0 = 1.6056e-04
Loss = 8.0222e-02, PNorm = 84.2389, GNorm = 0.4712, lr_0 = 1.6045e-04
Loss = 8.1773e-02, PNorm = 84.2436, GNorm = 0.7230, lr_0 = 1.6034e-04
Loss = 8.2614e-02, PNorm = 84.2461, GNorm = 0.7233, lr_0 = 1.6023e-04
Loss = 8.8808e-02, PNorm = 84.2492, GNorm = 0.8629, lr_0 = 1.6012e-04
Loss = 8.3852e-02, PNorm = 84.2504, GNorm = 0.7585, lr_0 = 1.6001e-04
Loss = 8.1772e-02, PNorm = 84.2530, GNorm = 0.4882, lr_0 = 1.5990e-04
Loss = 8.0262e-02, PNorm = 84.2571, GNorm = 0.5961, lr_0 = 1.5979e-04
Loss = 8.0697e-02, PNorm = 84.2606, GNorm = 0.8328, lr_0 = 1.5968e-04
Loss = 8.8654e-02, PNorm = 84.2648, GNorm = 0.5744, lr_0 = 1.5957e-04
Loss = 8.7604e-02, PNorm = 84.2698, GNorm = 0.7586, lr_0 = 1.5946e-04
Loss = 1.0714e-01, PNorm = 84.2731, GNorm = 1.0018, lr_0 = 1.5935e-04
Loss = 7.4421e-02, PNorm = 84.2744, GNorm = 0.5948, lr_0 = 1.5924e-04
Loss = 8.4515e-02, PNorm = 84.2762, GNorm = 0.5521, lr_0 = 1.5913e-04
Loss = 8.1286e-02, PNorm = 84.2810, GNorm = 0.8320, lr_0 = 1.5902e-04
Loss = 8.2224e-02, PNorm = 84.2854, GNorm = 0.6323, lr_0 = 1.5891e-04
Loss = 7.4635e-02, PNorm = 84.2862, GNorm = 0.5669, lr_0 = 1.5880e-04
Loss = 8.0045e-02, PNorm = 84.2887, GNorm = 0.6035, lr_0 = 1.5870e-04
Loss = 7.5991e-02, PNorm = 84.2921, GNorm = 0.6500, lr_0 = 1.5859e-04
Loss = 8.1458e-02, PNorm = 84.2941, GNorm = 0.6714, lr_0 = 1.5848e-04
Loss = 7.8369e-02, PNorm = 84.2967, GNorm = 0.8805, lr_0 = 1.5837e-04
Loss = 8.9303e-02, PNorm = 84.3008, GNorm = 0.8869, lr_0 = 1.5826e-04
Loss = 8.3580e-02, PNorm = 84.3030, GNorm = 0.5018, lr_0 = 1.5815e-04
Loss = 7.4251e-02, PNorm = 84.3052, GNorm = 0.7785, lr_0 = 1.5804e-04
Loss = 8.5359e-02, PNorm = 84.3055, GNorm = 0.6981, lr_0 = 1.5794e-04
Loss = 8.6419e-02, PNorm = 84.3084, GNorm = 0.6592, lr_0 = 1.5783e-04
Loss = 7.3395e-02, PNorm = 84.3116, GNorm = 0.5467, lr_0 = 1.5772e-04
Loss = 8.4632e-02, PNorm = 84.3133, GNorm = 0.6942, lr_0 = 1.5761e-04
Loss = 7.9696e-02, PNorm = 84.3152, GNorm = 0.7969, lr_0 = 1.5750e-04
Loss = 8.8969e-02, PNorm = 84.3160, GNorm = 0.6430, lr_0 = 1.5740e-04
Loss = 7.7939e-02, PNorm = 84.3177, GNorm = 0.6346, lr_0 = 1.5729e-04
Loss = 8.6553e-02, PNorm = 84.3213, GNorm = 0.6854, lr_0 = 1.5718e-04
Loss = 7.9338e-02, PNorm = 84.3232, GNorm = 0.7842, lr_0 = 1.5707e-04
Loss = 7.8928e-02, PNorm = 84.3275, GNorm = 0.7455, lr_0 = 1.5697e-04
Loss = 8.0867e-02, PNorm = 84.3290, GNorm = 0.7389, lr_0 = 1.5686e-04
Loss = 8.6425e-02, PNorm = 84.3308, GNorm = 0.8075, lr_0 = 1.5675e-04
Loss = 8.3989e-02, PNorm = 84.3337, GNorm = 0.8190, lr_0 = 1.5664e-04
Loss = 8.3792e-02, PNorm = 84.3379, GNorm = 0.5283, lr_0 = 1.5654e-04
Loss = 7.8672e-02, PNorm = 84.3397, GNorm = 0.4596, lr_0 = 1.5643e-04
Loss = 7.4494e-02, PNorm = 84.3413, GNorm = 0.7416, lr_0 = 1.5632e-04
Loss = 8.9956e-02, PNorm = 84.3439, GNorm = 0.6376, lr_0 = 1.5621e-04
Loss = 6.9205e-02, PNorm = 84.3457, GNorm = 0.6045, lr_0 = 1.5611e-04
Loss = 8.3784e-02, PNorm = 84.3485, GNorm = 0.8431, lr_0 = 1.5600e-04
Loss = 8.4333e-02, PNorm = 84.3500, GNorm = 0.5462, lr_0 = 1.5589e-04
Loss = 8.2905e-02, PNorm = 84.3523, GNorm = 0.6235, lr_0 = 1.5579e-04
Loss = 7.9106e-02, PNorm = 84.3538, GNorm = 0.8166, lr_0 = 1.5568e-04
Loss = 8.3381e-02, PNorm = 84.3572, GNorm = 0.6402, lr_0 = 1.5557e-04
Loss = 8.5137e-02, PNorm = 84.3582, GNorm = 0.6624, lr_0 = 1.5547e-04
Loss = 7.8078e-02, PNorm = 84.3624, GNorm = 0.4901, lr_0 = 1.5536e-04
Loss = 7.8616e-02, PNorm = 84.3618, GNorm = 0.5957, lr_0 = 1.5525e-04
Loss = 8.9778e-02, PNorm = 84.3641, GNorm = 0.5719, lr_0 = 1.5515e-04
Loss = 8.5272e-02, PNorm = 84.3652, GNorm = 1.0308, lr_0 = 1.5504e-04
Loss = 8.1011e-02, PNorm = 84.3667, GNorm = 0.7122, lr_0 = 1.5493e-04
Loss = 8.4148e-02, PNorm = 84.3710, GNorm = 0.8380, lr_0 = 1.5483e-04
Loss = 7.8464e-02, PNorm = 84.3761, GNorm = 0.5426, lr_0 = 1.5472e-04
Loss = 8.7453e-02, PNorm = 84.3799, GNorm = 0.5806, lr_0 = 1.5462e-04
Loss = 8.4014e-02, PNorm = 84.3816, GNorm = 0.5960, lr_0 = 1.5451e-04
Loss = 8.3511e-02, PNorm = 84.3851, GNorm = 0.9522, lr_0 = 1.5440e-04
Loss = 9.0298e-02, PNorm = 84.3869, GNorm = 0.6954, lr_0 = 1.5430e-04
Loss = 8.7235e-02, PNorm = 84.3899, GNorm = 0.6667, lr_0 = 1.5419e-04
Loss = 8.1797e-02, PNorm = 84.3936, GNorm = 0.7003, lr_0 = 1.5409e-04
Loss = 8.7060e-02, PNorm = 84.3965, GNorm = 0.6391, lr_0 = 1.5398e-04
Loss = 8.4268e-02, PNorm = 84.3973, GNorm = 0.7237, lr_0 = 1.5388e-04
Loss = 8.8467e-02, PNorm = 84.3991, GNorm = 0.6064, lr_0 = 1.5377e-04
Loss = 9.2447e-02, PNorm = 84.4022, GNorm = 0.5089, lr_0 = 1.5367e-04
Loss = 7.8995e-02, PNorm = 84.4046, GNorm = 0.5502, lr_0 = 1.5356e-04
Loss = 8.4057e-02, PNorm = 84.4078, GNorm = 0.8457, lr_0 = 1.5346e-04
Loss = 9.2282e-02, PNorm = 84.4104, GNorm = 0.6603, lr_0 = 1.5335e-04
Loss = 8.0147e-02, PNorm = 84.4113, GNorm = 0.5318, lr_0 = 1.5325e-04
Loss = 8.7561e-02, PNorm = 84.4163, GNorm = 0.5195, lr_0 = 1.5314e-04
Loss = 8.1420e-02, PNorm = 84.4215, GNorm = 0.6882, lr_0 = 1.5304e-04
Loss = 1.0117e-01, PNorm = 84.4231, GNorm = 0.5935, lr_0 = 1.5293e-04
Loss = 8.0007e-02, PNorm = 84.4266, GNorm = 0.5483, lr_0 = 1.5283e-04
Loss = 8.8410e-02, PNorm = 84.4280, GNorm = 1.1046, lr_0 = 1.5272e-04
Loss = 9.0096e-02, PNorm = 84.4297, GNorm = 1.1573, lr_0 = 1.5262e-04
Loss = 8.8420e-02, PNorm = 84.4305, GNorm = 0.6116, lr_0 = 1.5251e-04
Loss = 7.2525e-02, PNorm = 84.4319, GNorm = 0.5276, lr_0 = 1.5241e-04
Loss = 8.2501e-02, PNorm = 84.4353, GNorm = 0.7324, lr_0 = 1.5230e-04
Loss = 8.2668e-02, PNorm = 84.4380, GNorm = 0.7091, lr_0 = 1.5220e-04
Loss = 8.6692e-02, PNorm = 84.4374, GNorm = 0.7688, lr_0 = 1.5209e-04
Loss = 7.7910e-02, PNorm = 84.4377, GNorm = 0.8163, lr_0 = 1.5199e-04
Loss = 8.0112e-02, PNorm = 84.4391, GNorm = 0.6726, lr_0 = 1.5189e-04
Loss = 7.7895e-02, PNorm = 84.4424, GNorm = 0.6475, lr_0 = 1.5178e-04
Loss = 9.1777e-02, PNorm = 84.4452, GNorm = 0.7235, lr_0 = 1.5168e-04
Loss = 8.5135e-02, PNorm = 84.4475, GNorm = 0.6283, lr_0 = 1.5157e-04
Loss = 8.3462e-02, PNorm = 84.4494, GNorm = 0.5140, lr_0 = 1.5147e-04
Loss = 9.3000e-02, PNorm = 84.4493, GNorm = 0.7451, lr_0 = 1.5137e-04
Loss = 9.2082e-02, PNorm = 84.4520, GNorm = 0.7112, lr_0 = 1.5126e-04
Loss = 7.7477e-02, PNorm = 84.4557, GNorm = 0.6566, lr_0 = 1.5116e-04
Loss = 8.1203e-02, PNorm = 84.4596, GNorm = 0.6231, lr_0 = 1.5106e-04
Loss = 8.9168e-02, PNorm = 84.4621, GNorm = 0.6842, lr_0 = 1.5095e-04
Loss = 8.2591e-02, PNorm = 84.4655, GNorm = 0.6448, lr_0 = 1.5085e-04
Validation mae = 0.226906
Epoch 25
Loss = 8.1564e-02, PNorm = 84.4679, GNorm = 0.6759, lr_0 = 1.5075e-04
Loss = 7.0650e-02, PNorm = 84.4696, GNorm = 0.7848, lr_0 = 1.5064e-04
Loss = 7.6425e-02, PNorm = 84.4712, GNorm = 0.5572, lr_0 = 1.5054e-04
Loss = 7.9380e-02, PNorm = 84.4732, GNorm = 0.5900, lr_0 = 1.5044e-04
Loss = 7.6551e-02, PNorm = 84.4760, GNorm = 0.6917, lr_0 = 1.5033e-04
Loss = 6.6082e-02, PNorm = 84.4784, GNorm = 0.5877, lr_0 = 1.5023e-04
Loss = 8.3411e-02, PNorm = 84.4808, GNorm = 0.4974, lr_0 = 1.5013e-04
Loss = 8.0932e-02, PNorm = 84.4847, GNorm = 0.6160, lr_0 = 1.5002e-04
Loss = 8.3530e-02, PNorm = 84.4865, GNorm = 0.5795, lr_0 = 1.4992e-04
Loss = 8.5678e-02, PNorm = 84.4902, GNorm = 0.5811, lr_0 = 1.4982e-04
Loss = 7.4427e-02, PNorm = 84.4918, GNorm = 0.7024, lr_0 = 1.4972e-04
Loss = 7.6059e-02, PNorm = 84.4935, GNorm = 0.7202, lr_0 = 1.4961e-04
Loss = 8.8297e-02, PNorm = 84.4968, GNorm = 0.5333, lr_0 = 1.4951e-04
Loss = 7.9182e-02, PNorm = 84.4988, GNorm = 0.5682, lr_0 = 1.4941e-04
Loss = 7.5183e-02, PNorm = 84.5013, GNorm = 0.5755, lr_0 = 1.4931e-04
Loss = 9.0567e-02, PNorm = 84.5052, GNorm = 0.4603, lr_0 = 1.4920e-04
Loss = 7.3503e-02, PNorm = 84.5071, GNorm = 0.5344, lr_0 = 1.4910e-04
Loss = 8.2786e-02, PNorm = 84.5101, GNorm = 0.5958, lr_0 = 1.4900e-04
Loss = 7.3554e-02, PNorm = 84.5142, GNorm = 0.5414, lr_0 = 1.4890e-04
Loss = 1.0224e-01, PNorm = 84.5169, GNorm = 0.5803, lr_0 = 1.4880e-04
Loss = 8.1872e-02, PNorm = 84.5166, GNorm = 0.5812, lr_0 = 1.4869e-04
Loss = 7.5374e-02, PNorm = 84.5173, GNorm = 0.6768, lr_0 = 1.4859e-04
Loss = 7.6312e-02, PNorm = 84.5197, GNorm = 0.5945, lr_0 = 1.4849e-04
Loss = 7.7942e-02, PNorm = 84.5217, GNorm = 0.6215, lr_0 = 1.4839e-04
Loss = 7.2704e-02, PNorm = 84.5234, GNorm = 0.8298, lr_0 = 1.4829e-04
Loss = 8.3119e-02, PNorm = 84.5268, GNorm = 0.7254, lr_0 = 1.4818e-04
Loss = 8.5243e-02, PNorm = 84.5283, GNorm = 0.8763, lr_0 = 1.4808e-04
Loss = 7.3180e-02, PNorm = 84.5317, GNorm = 0.5653, lr_0 = 1.4798e-04
Loss = 7.8841e-02, PNorm = 84.5352, GNorm = 0.6791, lr_0 = 1.4788e-04
Loss = 7.9810e-02, PNorm = 84.5355, GNorm = 0.6733, lr_0 = 1.4778e-04
Loss = 7.9464e-02, PNorm = 84.5360, GNorm = 0.8933, lr_0 = 1.4768e-04
Loss = 6.3750e-02, PNorm = 84.5381, GNorm = 0.5576, lr_0 = 1.4758e-04
Loss = 7.8584e-02, PNorm = 84.5418, GNorm = 0.7175, lr_0 = 1.4748e-04
Loss = 7.6405e-02, PNorm = 84.5428, GNorm = 0.5718, lr_0 = 1.4737e-04
Loss = 9.2735e-02, PNorm = 84.5447, GNorm = 0.5117, lr_0 = 1.4727e-04
Loss = 8.4599e-02, PNorm = 84.5478, GNorm = 0.6394, lr_0 = 1.4717e-04
Loss = 7.8042e-02, PNorm = 84.5506, GNorm = 0.5658, lr_0 = 1.4707e-04
Loss = 7.3444e-02, PNorm = 84.5540, GNorm = 0.5896, lr_0 = 1.4697e-04
Loss = 8.5701e-02, PNorm = 84.5559, GNorm = 0.8214, lr_0 = 1.4687e-04
Loss = 8.1915e-02, PNorm = 84.5572, GNorm = 0.6452, lr_0 = 1.4677e-04
Loss = 8.6699e-02, PNorm = 84.5596, GNorm = 0.4716, lr_0 = 1.4667e-04
Loss = 8.0522e-02, PNorm = 84.5621, GNorm = 0.6476, lr_0 = 1.4657e-04
Loss = 6.9951e-02, PNorm = 84.5632, GNorm = 0.5318, lr_0 = 1.4647e-04
Loss = 8.0310e-02, PNorm = 84.5644, GNorm = 0.5256, lr_0 = 1.4637e-04
Loss = 8.2958e-02, PNorm = 84.5669, GNorm = 0.7601, lr_0 = 1.4627e-04
Loss = 7.5613e-02, PNorm = 84.5687, GNorm = 0.6743, lr_0 = 1.4617e-04
Loss = 8.1986e-02, PNorm = 84.5680, GNorm = 0.6120, lr_0 = 1.4607e-04
Loss = 7.2602e-02, PNorm = 84.5714, GNorm = 0.6737, lr_0 = 1.4597e-04
Loss = 7.9236e-02, PNorm = 84.5737, GNorm = 0.7695, lr_0 = 1.4587e-04
Loss = 8.6267e-02, PNorm = 84.5730, GNorm = 0.7210, lr_0 = 1.4577e-04
Loss = 7.0330e-02, PNorm = 84.5735, GNorm = 0.5695, lr_0 = 1.4567e-04
Loss = 7.5801e-02, PNorm = 84.5752, GNorm = 0.8685, lr_0 = 1.4557e-04
Loss = 8.3053e-02, PNorm = 84.5766, GNorm = 0.6505, lr_0 = 1.4547e-04
Loss = 7.3999e-02, PNorm = 84.5785, GNorm = 0.6613, lr_0 = 1.4537e-04
Loss = 8.6352e-02, PNorm = 84.5805, GNorm = 0.5471, lr_0 = 1.4527e-04
Loss = 8.6244e-02, PNorm = 84.5843, GNorm = 0.7703, lr_0 = 1.4517e-04
Loss = 7.8853e-02, PNorm = 84.5857, GNorm = 0.4958, lr_0 = 1.4507e-04
Loss = 8.5436e-02, PNorm = 84.5869, GNorm = 0.7637, lr_0 = 1.4497e-04
Loss = 8.0290e-02, PNorm = 84.5895, GNorm = 0.5385, lr_0 = 1.4487e-04
Loss = 8.1049e-02, PNorm = 84.5920, GNorm = 0.6152, lr_0 = 1.4477e-04
Loss = 7.5345e-02, PNorm = 84.5937, GNorm = 0.8271, lr_0 = 1.4467e-04
Loss = 8.6715e-02, PNorm = 84.5945, GNorm = 0.7348, lr_0 = 1.4457e-04
Loss = 7.9888e-02, PNorm = 84.5966, GNorm = 0.6026, lr_0 = 1.4447e-04
Loss = 9.6369e-02, PNorm = 84.5982, GNorm = 0.7156, lr_0 = 1.4438e-04
Loss = 9.2615e-02, PNorm = 84.5996, GNorm = 0.6321, lr_0 = 1.4428e-04
Loss = 8.3879e-02, PNorm = 84.6021, GNorm = 0.4826, lr_0 = 1.4418e-04
Loss = 7.5173e-02, PNorm = 84.6065, GNorm = 0.7160, lr_0 = 1.4408e-04
Loss = 8.2118e-02, PNorm = 84.6111, GNorm = 0.5476, lr_0 = 1.4398e-04
Loss = 8.4257e-02, PNorm = 84.6145, GNorm = 0.5714, lr_0 = 1.4388e-04
Loss = 7.7180e-02, PNorm = 84.6172, GNorm = 0.5446, lr_0 = 1.4378e-04
Loss = 8.0315e-02, PNorm = 84.6209, GNorm = 0.6307, lr_0 = 1.4368e-04
Loss = 7.6239e-02, PNorm = 84.6235, GNorm = 0.6793, lr_0 = 1.4359e-04
Loss = 8.5220e-02, PNorm = 84.6255, GNorm = 0.6448, lr_0 = 1.4349e-04
Loss = 8.1180e-02, PNorm = 84.6285, GNorm = 0.6208, lr_0 = 1.4339e-04
Loss = 7.5676e-02, PNorm = 84.6308, GNorm = 0.6230, lr_0 = 1.4329e-04
Loss = 8.5050e-02, PNorm = 84.6338, GNorm = 0.5980, lr_0 = 1.4319e-04
Loss = 8.7108e-02, PNorm = 84.6359, GNorm = 0.8322, lr_0 = 1.4310e-04
Loss = 8.5574e-02, PNorm = 84.6381, GNorm = 0.7256, lr_0 = 1.4300e-04
Loss = 7.9133e-02, PNorm = 84.6406, GNorm = 0.5325, lr_0 = 1.4290e-04
Loss = 7.8502e-02, PNorm = 84.6416, GNorm = 0.6318, lr_0 = 1.4280e-04
Loss = 8.0860e-02, PNorm = 84.6430, GNorm = 0.9855, lr_0 = 1.4270e-04
Loss = 7.9361e-02, PNorm = 84.6469, GNorm = 0.6387, lr_0 = 1.4261e-04
Loss = 8.1283e-02, PNorm = 84.6508, GNorm = 0.4964, lr_0 = 1.4251e-04
Loss = 8.8503e-02, PNorm = 84.6532, GNorm = 0.6025, lr_0 = 1.4241e-04
Loss = 8.8744e-02, PNorm = 84.6555, GNorm = 0.4580, lr_0 = 1.4231e-04
Loss = 8.4745e-02, PNorm = 84.6574, GNorm = 0.6782, lr_0 = 1.4222e-04
Loss = 8.3216e-02, PNorm = 84.6586, GNorm = 0.6460, lr_0 = 1.4212e-04
Loss = 6.9509e-02, PNorm = 84.6597, GNorm = 0.6489, lr_0 = 1.4202e-04
Loss = 7.3265e-02, PNorm = 84.6610, GNorm = 0.4891, lr_0 = 1.4192e-04
Loss = 8.7514e-02, PNorm = 84.6647, GNorm = 0.5274, lr_0 = 1.4183e-04
Loss = 8.5334e-02, PNorm = 84.6682, GNorm = 0.6090, lr_0 = 1.4173e-04
Loss = 7.4328e-02, PNorm = 84.6705, GNorm = 0.7207, lr_0 = 1.4163e-04
Loss = 8.9195e-02, PNorm = 84.6723, GNorm = 0.6621, lr_0 = 1.4153e-04
Loss = 8.7278e-02, PNorm = 84.6723, GNorm = 0.6173, lr_0 = 1.4144e-04
Loss = 7.6181e-02, PNorm = 84.6740, GNorm = 0.6097, lr_0 = 1.4134e-04
Loss = 7.9091e-02, PNorm = 84.6764, GNorm = 0.6438, lr_0 = 1.4124e-04
Loss = 8.5625e-02, PNorm = 84.6774, GNorm = 0.5554, lr_0 = 1.4115e-04
Loss = 8.3243e-02, PNorm = 84.6779, GNorm = 0.5269, lr_0 = 1.4105e-04
Loss = 6.6427e-02, PNorm = 84.6776, GNorm = 0.4719, lr_0 = 1.4095e-04
Loss = 9.3558e-02, PNorm = 84.6772, GNorm = 0.6993, lr_0 = 1.4086e-04
Loss = 7.4356e-02, PNorm = 84.6791, GNorm = 0.9038, lr_0 = 1.4076e-04
Loss = 8.2053e-02, PNorm = 84.6822, GNorm = 0.6263, lr_0 = 1.4066e-04
Loss = 7.9672e-02, PNorm = 84.6846, GNorm = 0.6207, lr_0 = 1.4057e-04
Loss = 7.9120e-02, PNorm = 84.6851, GNorm = 0.5642, lr_0 = 1.4047e-04
Loss = 8.9691e-02, PNorm = 84.6886, GNorm = 0.5957, lr_0 = 1.4038e-04
Loss = 7.3764e-02, PNorm = 84.6894, GNorm = 0.5189, lr_0 = 1.4028e-04
Loss = 7.6817e-02, PNorm = 84.6904, GNorm = 0.5679, lr_0 = 1.4018e-04
Loss = 8.7726e-02, PNorm = 84.6928, GNorm = 0.6236, lr_0 = 1.4009e-04
Loss = 8.0727e-02, PNorm = 84.6957, GNorm = 0.6428, lr_0 = 1.3999e-04
Loss = 8.8805e-02, PNorm = 84.6973, GNorm = 0.5610, lr_0 = 1.3990e-04
Loss = 7.1787e-02, PNorm = 84.7003, GNorm = 0.7243, lr_0 = 1.3980e-04
Loss = 7.9145e-02, PNorm = 84.7017, GNorm = 0.4413, lr_0 = 1.3970e-04
Loss = 7.6489e-02, PNorm = 84.7043, GNorm = 0.5110, lr_0 = 1.3961e-04
Loss = 8.0216e-02, PNorm = 84.7063, GNorm = 0.5019, lr_0 = 1.3951e-04
Loss = 8.3310e-02, PNorm = 84.7088, GNorm = 0.8811, lr_0 = 1.3942e-04
Loss = 7.9274e-02, PNorm = 84.7103, GNorm = 0.5569, lr_0 = 1.3932e-04
Loss = 8.4938e-02, PNorm = 84.7131, GNorm = 0.7814, lr_0 = 1.3923e-04
Loss = 8.2886e-02, PNorm = 84.7147, GNorm = 0.4210, lr_0 = 1.3913e-04
Loss = 8.3131e-02, PNorm = 84.7162, GNorm = 0.5874, lr_0 = 1.3904e-04
Loss = 7.4680e-02, PNorm = 84.7185, GNorm = 0.5868, lr_0 = 1.3894e-04
Validation mae = 0.226881
Epoch 26
Loss = 6.7391e-02, PNorm = 84.7229, GNorm = 0.6178, lr_0 = 1.3884e-04
Loss = 8.8387e-02, PNorm = 84.7264, GNorm = 0.5941, lr_0 = 1.3875e-04
Loss = 7.1145e-02, PNorm = 84.7285, GNorm = 0.5098, lr_0 = 1.3865e-04
Loss = 7.8170e-02, PNorm = 84.7310, GNorm = 0.5945, lr_0 = 1.3856e-04
Loss = 9.1178e-02, PNorm = 84.7335, GNorm = 0.6141, lr_0 = 1.3846e-04
Loss = 7.2953e-02, PNorm = 84.7359, GNorm = 0.5377, lr_0 = 1.3837e-04
Loss = 8.3477e-02, PNorm = 84.7377, GNorm = 0.5871, lr_0 = 1.3828e-04
Loss = 7.9172e-02, PNorm = 84.7388, GNorm = 0.6028, lr_0 = 1.3818e-04
Loss = 8.0048e-02, PNorm = 84.7421, GNorm = 1.0555, lr_0 = 1.3809e-04
Loss = 7.3395e-02, PNorm = 84.7441, GNorm = 0.5707, lr_0 = 1.3799e-04
Loss = 8.7822e-02, PNorm = 84.7450, GNorm = 0.7643, lr_0 = 1.3790e-04
Loss = 7.1907e-02, PNorm = 84.7463, GNorm = 0.6189, lr_0 = 1.3780e-04
Loss = 6.8784e-02, PNorm = 84.7483, GNorm = 0.6894, lr_0 = 1.3771e-04
Loss = 8.4715e-02, PNorm = 84.7500, GNorm = 0.7524, lr_0 = 1.3761e-04
Loss = 7.2105e-02, PNorm = 84.7532, GNorm = 0.7531, lr_0 = 1.3752e-04
Loss = 7.5965e-02, PNorm = 84.7557, GNorm = 0.5011, lr_0 = 1.3742e-04
Loss = 7.5038e-02, PNorm = 84.7575, GNorm = 0.4925, lr_0 = 1.3733e-04
Loss = 7.5623e-02, PNorm = 84.7603, GNorm = 0.4980, lr_0 = 1.3724e-04
Loss = 7.1139e-02, PNorm = 84.7623, GNorm = 0.5974, lr_0 = 1.3714e-04
Loss = 7.5921e-02, PNorm = 84.7638, GNorm = 0.5102, lr_0 = 1.3705e-04
Loss = 9.3266e-02, PNorm = 84.7662, GNorm = 0.6720, lr_0 = 1.3695e-04
Loss = 7.5524e-02, PNorm = 84.7693, GNorm = 0.6478, lr_0 = 1.3686e-04
Loss = 7.8417e-02, PNorm = 84.7700, GNorm = 0.5933, lr_0 = 1.3677e-04
Loss = 7.7408e-02, PNorm = 84.7708, GNorm = 0.6684, lr_0 = 1.3667e-04
Loss = 8.6119e-02, PNorm = 84.7729, GNorm = 0.4764, lr_0 = 1.3658e-04
Loss = 7.9759e-02, PNorm = 84.7737, GNorm = 0.5919, lr_0 = 1.3649e-04
Loss = 7.3538e-02, PNorm = 84.7740, GNorm = 0.6483, lr_0 = 1.3639e-04
Loss = 8.5548e-02, PNorm = 84.7759, GNorm = 0.6897, lr_0 = 1.3630e-04
Loss = 7.8237e-02, PNorm = 84.7798, GNorm = 0.6452, lr_0 = 1.3621e-04
Loss = 6.3216e-02, PNorm = 84.7827, GNorm = 0.6662, lr_0 = 1.3611e-04
Loss = 8.9412e-02, PNorm = 84.7829, GNorm = 0.6236, lr_0 = 1.3602e-04
Loss = 8.4208e-02, PNorm = 84.7864, GNorm = 0.6225, lr_0 = 1.3593e-04
Loss = 8.2433e-02, PNorm = 84.7884, GNorm = 0.7595, lr_0 = 1.3583e-04
Loss = 7.9006e-02, PNorm = 84.7922, GNorm = 0.6481, lr_0 = 1.3574e-04
Loss = 7.6617e-02, PNorm = 84.7954, GNorm = 0.6223, lr_0 = 1.3565e-04
Loss = 8.1178e-02, PNorm = 84.7981, GNorm = 0.5387, lr_0 = 1.3555e-04
Loss = 7.5555e-02, PNorm = 84.8014, GNorm = 0.6193, lr_0 = 1.3546e-04
Loss = 7.9166e-02, PNorm = 84.8043, GNorm = 0.4785, lr_0 = 1.3537e-04
Loss = 8.8711e-02, PNorm = 84.8073, GNorm = 0.7009, lr_0 = 1.3528e-04
Loss = 9.2286e-02, PNorm = 84.8097, GNorm = 0.6505, lr_0 = 1.3518e-04
Loss = 7.8379e-02, PNorm = 84.8114, GNorm = 0.6547, lr_0 = 1.3509e-04
Loss = 8.2295e-02, PNorm = 84.8140, GNorm = 0.6342, lr_0 = 1.3500e-04
Loss = 6.8548e-02, PNorm = 84.8160, GNorm = 0.4422, lr_0 = 1.3491e-04
Loss = 7.8114e-02, PNorm = 84.8162, GNorm = 0.5291, lr_0 = 1.3481e-04
Loss = 7.9722e-02, PNorm = 84.8176, GNorm = 0.4792, lr_0 = 1.3472e-04
Loss = 7.7090e-02, PNorm = 84.8214, GNorm = 0.6060, lr_0 = 1.3463e-04
Loss = 7.9478e-02, PNorm = 84.8241, GNorm = 0.7310, lr_0 = 1.3454e-04
Loss = 7.6514e-02, PNorm = 84.8227, GNorm = 0.4845, lr_0 = 1.3444e-04
Loss = 7.1380e-02, PNorm = 84.8240, GNorm = 0.7209, lr_0 = 1.3435e-04
Loss = 7.0992e-02, PNorm = 84.8260, GNorm = 0.5754, lr_0 = 1.3426e-04
Loss = 7.7980e-02, PNorm = 84.8268, GNorm = 0.6179, lr_0 = 1.3417e-04
Loss = 8.7303e-02, PNorm = 84.8289, GNorm = 0.5008, lr_0 = 1.3408e-04
Loss = 8.3411e-02, PNorm = 84.8315, GNorm = 0.6990, lr_0 = 1.3398e-04
Loss = 8.1109e-02, PNorm = 84.8334, GNorm = 0.6543, lr_0 = 1.3389e-04
Loss = 8.6333e-02, PNorm = 84.8340, GNorm = 0.6454, lr_0 = 1.3380e-04
Loss = 7.7560e-02, PNorm = 84.8357, GNorm = 0.8263, lr_0 = 1.3371e-04
Loss = 7.7119e-02, PNorm = 84.8356, GNorm = 0.6147, lr_0 = 1.3362e-04
Loss = 9.1771e-02, PNorm = 84.8380, GNorm = 0.7551, lr_0 = 1.3353e-04
Loss = 8.3897e-02, PNorm = 84.8424, GNorm = 0.6684, lr_0 = 1.3343e-04
Loss = 6.6451e-02, PNorm = 84.8456, GNorm = 0.6598, lr_0 = 1.3334e-04
Loss = 8.9841e-02, PNorm = 84.8482, GNorm = 0.7005, lr_0 = 1.3325e-04
Loss = 8.0001e-02, PNorm = 84.8502, GNorm = 0.7979, lr_0 = 1.3316e-04
Loss = 7.0782e-02, PNorm = 84.8519, GNorm = 0.4610, lr_0 = 1.3307e-04
Loss = 8.1165e-02, PNorm = 84.8541, GNorm = 0.7270, lr_0 = 1.3298e-04
Loss = 9.1679e-02, PNorm = 84.8571, GNorm = 0.6375, lr_0 = 1.3289e-04
Loss = 8.1242e-02, PNorm = 84.8575, GNorm = 0.6251, lr_0 = 1.3280e-04
Loss = 6.5126e-02, PNorm = 84.8602, GNorm = 0.5000, lr_0 = 1.3270e-04
Loss = 7.7807e-02, PNorm = 84.8619, GNorm = 0.5698, lr_0 = 1.3261e-04
Loss = 7.9437e-02, PNorm = 84.8638, GNorm = 0.7115, lr_0 = 1.3252e-04
Loss = 9.0669e-02, PNorm = 84.8674, GNorm = 0.6863, lr_0 = 1.3243e-04
Loss = 7.5462e-02, PNorm = 84.8686, GNorm = 0.7613, lr_0 = 1.3234e-04
Loss = 8.1209e-02, PNorm = 84.8699, GNorm = 0.6643, lr_0 = 1.3225e-04
Loss = 8.6217e-02, PNorm = 84.8733, GNorm = 0.7742, lr_0 = 1.3216e-04
Loss = 8.5019e-02, PNorm = 84.8746, GNorm = 0.7206, lr_0 = 1.3207e-04
Loss = 7.6707e-02, PNorm = 84.8756, GNorm = 0.5587, lr_0 = 1.3198e-04
Loss = 7.0683e-02, PNorm = 84.8771, GNorm = 0.5743, lr_0 = 1.3189e-04
Loss = 8.6101e-02, PNorm = 84.8792, GNorm = 0.6329, lr_0 = 1.3180e-04
Loss = 8.6985e-02, PNorm = 84.8843, GNorm = 0.5916, lr_0 = 1.3171e-04
Loss = 8.4170e-02, PNorm = 84.8860, GNorm = 0.7850, lr_0 = 1.3162e-04
Loss = 7.9528e-02, PNorm = 84.8868, GNorm = 0.6653, lr_0 = 1.3153e-04
Loss = 7.9062e-02, PNorm = 84.8887, GNorm = 0.7490, lr_0 = 1.3144e-04
Loss = 7.6451e-02, PNorm = 84.8912, GNorm = 0.7003, lr_0 = 1.3135e-04
Loss = 7.3001e-02, PNorm = 84.8931, GNorm = 0.6991, lr_0 = 1.3126e-04
Loss = 7.2076e-02, PNorm = 84.8963, GNorm = 0.5446, lr_0 = 1.3117e-04
Loss = 7.6043e-02, PNorm = 84.8979, GNorm = 0.4730, lr_0 = 1.3108e-04
Loss = 8.2959e-02, PNorm = 84.8987, GNorm = 0.7333, lr_0 = 1.3099e-04
Loss = 7.5109e-02, PNorm = 84.9021, GNorm = 0.6685, lr_0 = 1.3090e-04
Loss = 7.4814e-02, PNorm = 84.9022, GNorm = 0.5576, lr_0 = 1.3081e-04
Loss = 8.8007e-02, PNorm = 84.9050, GNorm = 0.6804, lr_0 = 1.3072e-04
Loss = 9.7482e-02, PNorm = 84.9078, GNorm = 0.6148, lr_0 = 1.3063e-04
Loss = 8.4477e-02, PNorm = 84.9095, GNorm = 0.6019, lr_0 = 1.3054e-04
Loss = 7.6269e-02, PNorm = 84.9107, GNorm = 0.8187, lr_0 = 1.3045e-04
Loss = 7.8507e-02, PNorm = 84.9115, GNorm = 0.6725, lr_0 = 1.3036e-04
Loss = 7.9452e-02, PNorm = 84.9126, GNorm = 0.5292, lr_0 = 1.3027e-04
Loss = 7.7050e-02, PNorm = 84.9136, GNorm = 0.7123, lr_0 = 1.3018e-04
Loss = 7.7105e-02, PNorm = 84.9159, GNorm = 0.6198, lr_0 = 1.3009e-04
Loss = 7.4465e-02, PNorm = 84.9175, GNorm = 0.5131, lr_0 = 1.3000e-04
Loss = 7.7090e-02, PNorm = 84.9190, GNorm = 0.9447, lr_0 = 1.2992e-04
Loss = 8.4139e-02, PNorm = 84.9205, GNorm = 0.5564, lr_0 = 1.2983e-04
Loss = 7.2579e-02, PNorm = 84.9223, GNorm = 0.5137, lr_0 = 1.2974e-04
Loss = 7.2701e-02, PNorm = 84.9247, GNorm = 0.6404, lr_0 = 1.2965e-04
Loss = 7.2292e-02, PNorm = 84.9278, GNorm = 0.6314, lr_0 = 1.2956e-04
Loss = 7.6891e-02, PNorm = 84.9299, GNorm = 0.4894, lr_0 = 1.2947e-04
Loss = 8.0821e-02, PNorm = 84.9308, GNorm = 0.6644, lr_0 = 1.2938e-04
Loss = 7.5819e-02, PNorm = 84.9337, GNorm = 0.7735, lr_0 = 1.2929e-04
Loss = 8.3742e-02, PNorm = 84.9362, GNorm = 0.5078, lr_0 = 1.2921e-04
Loss = 7.4728e-02, PNorm = 84.9383, GNorm = 0.5404, lr_0 = 1.2912e-04
Loss = 7.6013e-02, PNorm = 84.9418, GNorm = 0.9152, lr_0 = 1.2903e-04
Loss = 7.1232e-02, PNorm = 84.9473, GNorm = 0.5865, lr_0 = 1.2894e-04
Loss = 7.6560e-02, PNorm = 84.9487, GNorm = 0.7172, lr_0 = 1.2885e-04
Loss = 7.1302e-02, PNorm = 84.9502, GNorm = 0.6119, lr_0 = 1.2876e-04
Loss = 7.8718e-02, PNorm = 84.9502, GNorm = 0.8754, lr_0 = 1.2867e-04
Loss = 7.7548e-02, PNorm = 84.9518, GNorm = 0.6442, lr_0 = 1.2859e-04
Loss = 8.5055e-02, PNorm = 84.9522, GNorm = 0.7008, lr_0 = 1.2850e-04
Loss = 7.4120e-02, PNorm = 84.9517, GNorm = 0.5767, lr_0 = 1.2841e-04
Loss = 9.2668e-02, PNorm = 84.9538, GNorm = 0.6755, lr_0 = 1.2832e-04
Loss = 9.1918e-02, PNorm = 84.9562, GNorm = 0.4814, lr_0 = 1.2823e-04
Loss = 7.6900e-02, PNorm = 84.9592, GNorm = 0.5446, lr_0 = 1.2815e-04
Loss = 7.3033e-02, PNorm = 84.9612, GNorm = 0.4389, lr_0 = 1.2806e-04
Loss = 8.2976e-02, PNorm = 84.9639, GNorm = 1.1288, lr_0 = 1.2797e-04
Validation mae = 0.230632
Epoch 27
Loss = 9.1237e-02, PNorm = 84.9637, GNorm = 0.8358, lr_0 = 1.2788e-04
Loss = 7.6659e-02, PNorm = 84.9656, GNorm = 0.6623, lr_0 = 1.2780e-04
Loss = 7.9028e-02, PNorm = 84.9693, GNorm = 0.9357, lr_0 = 1.2771e-04
Loss = 7.8339e-02, PNorm = 84.9709, GNorm = 0.6539, lr_0 = 1.2762e-04
Loss = 7.1576e-02, PNorm = 84.9709, GNorm = 0.7400, lr_0 = 1.2753e-04
Loss = 6.6536e-02, PNorm = 84.9729, GNorm = 0.8111, lr_0 = 1.2745e-04
Loss = 7.4391e-02, PNorm = 84.9743, GNorm = 0.5710, lr_0 = 1.2736e-04
Loss = 7.8281e-02, PNorm = 84.9744, GNorm = 0.5862, lr_0 = 1.2727e-04
Loss = 8.1812e-02, PNorm = 84.9762, GNorm = 0.6206, lr_0 = 1.2718e-04
Loss = 8.7350e-02, PNorm = 84.9794, GNorm = 0.8532, lr_0 = 1.2710e-04
Loss = 7.8657e-02, PNorm = 84.9832, GNorm = 0.8189, lr_0 = 1.2701e-04
Loss = 6.9131e-02, PNorm = 84.9854, GNorm = 0.4543, lr_0 = 1.2692e-04
Loss = 7.4219e-02, PNorm = 84.9855, GNorm = 0.6932, lr_0 = 1.2684e-04
Loss = 8.0064e-02, PNorm = 84.9857, GNorm = 0.6504, lr_0 = 1.2675e-04
Loss = 7.4194e-02, PNorm = 84.9878, GNorm = 0.6998, lr_0 = 1.2666e-04
Loss = 7.9856e-02, PNorm = 84.9904, GNorm = 0.7558, lr_0 = 1.2658e-04
Loss = 8.6084e-02, PNorm = 84.9941, GNorm = 0.6656, lr_0 = 1.2649e-04
Loss = 7.6067e-02, PNorm = 84.9950, GNorm = 0.6139, lr_0 = 1.2640e-04
Loss = 7.3174e-02, PNorm = 84.9963, GNorm = 0.4618, lr_0 = 1.2632e-04
Loss = 9.5057e-02, PNorm = 85.0005, GNorm = 0.7937, lr_0 = 1.2623e-04
Loss = 8.3075e-02, PNorm = 85.0029, GNorm = 0.6371, lr_0 = 1.2614e-04
Loss = 7.0449e-02, PNorm = 85.0026, GNorm = 0.6388, lr_0 = 1.2606e-04
Loss = 7.4571e-02, PNorm = 85.0040, GNorm = 0.4329, lr_0 = 1.2597e-04
Loss = 7.4994e-02, PNorm = 85.0061, GNorm = 0.6145, lr_0 = 1.2588e-04
Loss = 6.9951e-02, PNorm = 85.0067, GNorm = 0.6093, lr_0 = 1.2580e-04
Loss = 7.1025e-02, PNorm = 85.0076, GNorm = 0.4969, lr_0 = 1.2571e-04
Loss = 7.4167e-02, PNorm = 85.0108, GNorm = 0.7276, lr_0 = 1.2563e-04
Loss = 8.3427e-02, PNorm = 85.0122, GNorm = 0.7251, lr_0 = 1.2554e-04
Loss = 6.4940e-02, PNorm = 85.0143, GNorm = 0.5062, lr_0 = 1.2545e-04
Loss = 6.9030e-02, PNorm = 85.0166, GNorm = 0.6566, lr_0 = 1.2537e-04
Loss = 8.4513e-02, PNorm = 85.0197, GNorm = 0.6553, lr_0 = 1.2528e-04
Loss = 7.4327e-02, PNorm = 85.0201, GNorm = 0.6178, lr_0 = 1.2520e-04
Loss = 8.2202e-02, PNorm = 85.0226, GNorm = 0.5626, lr_0 = 1.2511e-04
Loss = 7.5381e-02, PNorm = 85.0247, GNorm = 0.9433, lr_0 = 1.2502e-04
Loss = 7.3499e-02, PNorm = 85.0282, GNorm = 0.6476, lr_0 = 1.2494e-04
Loss = 7.6653e-02, PNorm = 85.0298, GNorm = 0.6644, lr_0 = 1.2485e-04
Loss = 7.8022e-02, PNorm = 85.0321, GNorm = 0.6862, lr_0 = 1.2477e-04
Loss = 8.2042e-02, PNorm = 85.0332, GNorm = 0.6316, lr_0 = 1.2468e-04
Loss = 6.7505e-02, PNorm = 85.0349, GNorm = 0.6514, lr_0 = 1.2460e-04
Loss = 7.1448e-02, PNorm = 85.0357, GNorm = 0.5364, lr_0 = 1.2451e-04
Loss = 7.9216e-02, PNorm = 85.0363, GNorm = 0.5487, lr_0 = 1.2443e-04
Loss = 8.0868e-02, PNorm = 85.0379, GNorm = 0.5587, lr_0 = 1.2434e-04
Loss = 7.8982e-02, PNorm = 85.0396, GNorm = 0.5959, lr_0 = 1.2426e-04
Loss = 8.1277e-02, PNorm = 85.0409, GNorm = 0.9152, lr_0 = 1.2417e-04
Loss = 7.3932e-02, PNorm = 85.0428, GNorm = 0.5341, lr_0 = 1.2409e-04
Loss = 7.1381e-02, PNorm = 85.0454, GNorm = 0.5092, lr_0 = 1.2400e-04
Loss = 8.1549e-02, PNorm = 85.0470, GNorm = 0.4978, lr_0 = 1.2392e-04
Loss = 7.7487e-02, PNorm = 85.0464, GNorm = 0.5714, lr_0 = 1.2383e-04
Loss = 7.3037e-02, PNorm = 85.0488, GNorm = 0.5162, lr_0 = 1.2375e-04
Loss = 7.5820e-02, PNorm = 85.0498, GNorm = 0.6106, lr_0 = 1.2366e-04
Loss = 7.5364e-02, PNorm = 85.0497, GNorm = 0.6047, lr_0 = 1.2358e-04
Loss = 8.2829e-02, PNorm = 85.0510, GNorm = 0.7660, lr_0 = 1.2349e-04
Loss = 7.6220e-02, PNorm = 85.0539, GNorm = 0.7271, lr_0 = 1.2341e-04
Loss = 7.9619e-02, PNorm = 85.0577, GNorm = 0.4404, lr_0 = 1.2332e-04
Loss = 7.3904e-02, PNorm = 85.0615, GNorm = 0.7424, lr_0 = 1.2324e-04
Loss = 8.4602e-02, PNorm = 85.0639, GNorm = 0.6075, lr_0 = 1.2315e-04
Loss = 7.3990e-02, PNorm = 85.0668, GNorm = 0.5874, lr_0 = 1.2307e-04
Loss = 8.5526e-02, PNorm = 85.0684, GNorm = 0.6391, lr_0 = 1.2298e-04
Loss = 7.5041e-02, PNorm = 85.0685, GNorm = 0.7811, lr_0 = 1.2290e-04
Loss = 7.9170e-02, PNorm = 85.0692, GNorm = 0.6930, lr_0 = 1.2282e-04
Loss = 7.9763e-02, PNorm = 85.0704, GNorm = 0.9067, lr_0 = 1.2273e-04
Loss = 7.5606e-02, PNorm = 85.0719, GNorm = 0.7155, lr_0 = 1.2265e-04
Loss = 7.8845e-02, PNorm = 85.0729, GNorm = 0.6757, lr_0 = 1.2256e-04
Loss = 8.6667e-02, PNorm = 85.0757, GNorm = 0.8987, lr_0 = 1.2248e-04
Loss = 6.3475e-02, PNorm = 85.0770, GNorm = 0.6029, lr_0 = 1.2240e-04
Loss = 7.6462e-02, PNorm = 85.0778, GNorm = 0.7486, lr_0 = 1.2231e-04
Loss = 8.6105e-02, PNorm = 85.0805, GNorm = 0.6848, lr_0 = 1.2223e-04
Loss = 6.9540e-02, PNorm = 85.0832, GNorm = 0.7764, lr_0 = 1.2214e-04
Loss = 8.1510e-02, PNorm = 85.0838, GNorm = 0.4505, lr_0 = 1.2206e-04
Loss = 7.5478e-02, PNorm = 85.0857, GNorm = 0.6423, lr_0 = 1.2198e-04
Loss = 6.9214e-02, PNorm = 85.0864, GNorm = 0.6491, lr_0 = 1.2189e-04
Loss = 8.1658e-02, PNorm = 85.0855, GNorm = 0.5487, lr_0 = 1.2181e-04
Loss = 7.4050e-02, PNorm = 85.0852, GNorm = 0.5965, lr_0 = 1.2173e-04
Loss = 7.4343e-02, PNorm = 85.0856, GNorm = 1.1247, lr_0 = 1.2164e-04
Loss = 7.4870e-02, PNorm = 85.0868, GNorm = 0.8876, lr_0 = 1.2156e-04
Loss = 7.1782e-02, PNorm = 85.0896, GNorm = 0.5103, lr_0 = 1.2148e-04
Loss = 7.9624e-02, PNorm = 85.0926, GNorm = 0.5493, lr_0 = 1.2139e-04
Loss = 8.7084e-02, PNorm = 85.0941, GNorm = 0.7091, lr_0 = 1.2131e-04
Loss = 8.1271e-02, PNorm = 85.0943, GNorm = 0.5542, lr_0 = 1.2123e-04
Loss = 8.0199e-02, PNorm = 85.0956, GNorm = 0.5913, lr_0 = 1.2114e-04
Loss = 6.2918e-02, PNorm = 85.0974, GNorm = 0.5216, lr_0 = 1.2106e-04
Loss = 7.3782e-02, PNorm = 85.0995, GNorm = 0.5565, lr_0 = 1.2098e-04
Loss = 8.4048e-02, PNorm = 85.1032, GNorm = 0.6345, lr_0 = 1.2090e-04
Loss = 8.0874e-02, PNorm = 85.1053, GNorm = 0.5232, lr_0 = 1.2081e-04
Loss = 8.0001e-02, PNorm = 85.1054, GNorm = 0.6560, lr_0 = 1.2073e-04
Loss = 7.8834e-02, PNorm = 85.1067, GNorm = 0.6620, lr_0 = 1.2065e-04
Loss = 8.2270e-02, PNorm = 85.1094, GNorm = 0.5422, lr_0 = 1.2056e-04
Loss = 8.0838e-02, PNorm = 85.1118, GNorm = 0.8029, lr_0 = 1.2048e-04
Loss = 7.3797e-02, PNorm = 85.1146, GNorm = 0.7583, lr_0 = 1.2040e-04
Loss = 7.7945e-02, PNorm = 85.1173, GNorm = 0.6370, lr_0 = 1.2032e-04
Loss = 7.7143e-02, PNorm = 85.1188, GNorm = 0.9006, lr_0 = 1.2023e-04
Loss = 7.2488e-02, PNorm = 85.1200, GNorm = 0.4673, lr_0 = 1.2015e-04
Loss = 8.0393e-02, PNorm = 85.1208, GNorm = 0.6376, lr_0 = 1.2007e-04
Loss = 7.4698e-02, PNorm = 85.1217, GNorm = 0.5954, lr_0 = 1.1999e-04
Loss = 8.3903e-02, PNorm = 85.1246, GNorm = 0.8205, lr_0 = 1.1991e-04
Loss = 7.0627e-02, PNorm = 85.1267, GNorm = 0.8176, lr_0 = 1.1982e-04
Loss = 7.7555e-02, PNorm = 85.1269, GNorm = 0.6706, lr_0 = 1.1974e-04
Loss = 8.4790e-02, PNorm = 85.1285, GNorm = 0.7990, lr_0 = 1.1966e-04
Loss = 8.7199e-02, PNorm = 85.1306, GNorm = 0.6602, lr_0 = 1.1958e-04
Loss = 8.0529e-02, PNorm = 85.1332, GNorm = 0.7421, lr_0 = 1.1950e-04
Loss = 7.7359e-02, PNorm = 85.1366, GNorm = 0.5174, lr_0 = 1.1941e-04
Loss = 9.4730e-02, PNorm = 85.1369, GNorm = 0.9788, lr_0 = 1.1933e-04
Loss = 8.2532e-02, PNorm = 85.1383, GNorm = 0.5885, lr_0 = 1.1925e-04
Loss = 7.5172e-02, PNorm = 85.1421, GNorm = 0.6061, lr_0 = 1.1917e-04
Loss = 7.5518e-02, PNorm = 85.1429, GNorm = 0.5994, lr_0 = 1.1909e-04
Loss = 7.4727e-02, PNorm = 85.1448, GNorm = 0.6447, lr_0 = 1.1901e-04
Loss = 6.8721e-02, PNorm = 85.1482, GNorm = 0.3926, lr_0 = 1.1892e-04
Loss = 7.7926e-02, PNorm = 85.1497, GNorm = 0.6679, lr_0 = 1.1884e-04
Loss = 7.3151e-02, PNorm = 85.1497, GNorm = 0.5199, lr_0 = 1.1876e-04
Loss = 8.5906e-02, PNorm = 85.1489, GNorm = 0.4645, lr_0 = 1.1868e-04
Loss = 7.9754e-02, PNorm = 85.1501, GNorm = 0.6695, lr_0 = 1.1860e-04
Loss = 6.7056e-02, PNorm = 85.1511, GNorm = 0.5736, lr_0 = 1.1852e-04
Loss = 7.4309e-02, PNorm = 85.1523, GNorm = 0.7087, lr_0 = 1.1844e-04
Loss = 8.5785e-02, PNorm = 85.1535, GNorm = 0.8290, lr_0 = 1.1835e-04
Loss = 8.3325e-02, PNorm = 85.1550, GNorm = 0.5974, lr_0 = 1.1827e-04
Loss = 7.9863e-02, PNorm = 85.1583, GNorm = 0.7750, lr_0 = 1.1819e-04
Loss = 8.0475e-02, PNorm = 85.1608, GNorm = 0.6002, lr_0 = 1.1811e-04
Loss = 7.3448e-02, PNorm = 85.1626, GNorm = 0.5638, lr_0 = 1.1803e-04
Loss = 8.4480e-02, PNorm = 85.1642, GNorm = 0.5840, lr_0 = 1.1795e-04
Loss = 7.9521e-02, PNorm = 85.1671, GNorm = 0.5533, lr_0 = 1.1787e-04
Validation mae = 0.228702
Epoch 28
Loss = 7.1458e-02, PNorm = 85.1675, GNorm = 0.6649, lr_0 = 1.1779e-04
Loss = 7.5929e-02, PNorm = 85.1690, GNorm = 0.6901, lr_0 = 1.1771e-04
Loss = 6.8378e-02, PNorm = 85.1717, GNorm = 0.5238, lr_0 = 1.1763e-04
Loss = 6.8187e-02, PNorm = 85.1743, GNorm = 0.7207, lr_0 = 1.1755e-04
Loss = 7.5181e-02, PNorm = 85.1770, GNorm = 0.6052, lr_0 = 1.1747e-04
Loss = 8.3843e-02, PNorm = 85.1787, GNorm = 0.5507, lr_0 = 1.1739e-04
Loss = 7.6712e-02, PNorm = 85.1827, GNorm = 0.5523, lr_0 = 1.1730e-04
Loss = 6.8301e-02, PNorm = 85.1857, GNorm = 0.7193, lr_0 = 1.1722e-04
Loss = 6.7764e-02, PNorm = 85.1872, GNorm = 0.5739, lr_0 = 1.1714e-04
Loss = 7.8812e-02, PNorm = 85.1890, GNorm = 0.6401, lr_0 = 1.1706e-04
Loss = 8.1359e-02, PNorm = 85.1917, GNorm = 0.6839, lr_0 = 1.1698e-04
Loss = 7.8012e-02, PNorm = 85.1921, GNorm = 0.5083, lr_0 = 1.1690e-04
Loss = 7.6067e-02, PNorm = 85.1925, GNorm = 0.5225, lr_0 = 1.1682e-04
Loss = 8.6749e-02, PNorm = 85.1939, GNorm = 0.5833, lr_0 = 1.1674e-04
Loss = 6.6342e-02, PNorm = 85.1954, GNorm = 0.6201, lr_0 = 1.1666e-04
Loss = 6.7403e-02, PNorm = 85.1961, GNorm = 0.6649, lr_0 = 1.1658e-04
Loss = 6.2502e-02, PNorm = 85.1974, GNorm = 0.5139, lr_0 = 1.1650e-04
Loss = 6.7890e-02, PNorm = 85.1995, GNorm = 0.7201, lr_0 = 1.1642e-04
Loss = 9.0051e-02, PNorm = 85.2017, GNorm = 0.5025, lr_0 = 1.1634e-04
Loss = 7.7931e-02, PNorm = 85.2056, GNorm = 0.6239, lr_0 = 1.1626e-04
Loss = 7.7982e-02, PNorm = 85.2068, GNorm = 0.5695, lr_0 = 1.1618e-04
Loss = 8.3826e-02, PNorm = 85.2090, GNorm = 0.8738, lr_0 = 1.1611e-04
Loss = 8.3225e-02, PNorm = 85.2106, GNorm = 0.5284, lr_0 = 1.1603e-04
Loss = 6.8614e-02, PNorm = 85.2129, GNorm = 0.6957, lr_0 = 1.1595e-04
Loss = 7.5226e-02, PNorm = 85.2151, GNorm = 0.6088, lr_0 = 1.1587e-04
Loss = 7.7843e-02, PNorm = 85.2168, GNorm = 0.6249, lr_0 = 1.1579e-04
Loss = 7.5194e-02, PNorm = 85.2181, GNorm = 0.6129, lr_0 = 1.1571e-04
Loss = 6.8858e-02, PNorm = 85.2178, GNorm = 0.5606, lr_0 = 1.1563e-04
Loss = 8.6715e-02, PNorm = 85.2191, GNorm = 0.6258, lr_0 = 1.1555e-04
Loss = 8.6337e-02, PNorm = 85.2201, GNorm = 0.6598, lr_0 = 1.1547e-04
Loss = 7.4955e-02, PNorm = 85.2218, GNorm = 0.4966, lr_0 = 1.1539e-04
Loss = 8.7677e-02, PNorm = 85.2235, GNorm = 0.9277, lr_0 = 1.1531e-04
Loss = 7.8215e-02, PNorm = 85.2254, GNorm = 0.5694, lr_0 = 1.1523e-04
Loss = 6.6601e-02, PNorm = 85.2269, GNorm = 0.6076, lr_0 = 1.1515e-04
Loss = 7.4805e-02, PNorm = 85.2285, GNorm = 0.5596, lr_0 = 1.1508e-04
Loss = 8.1100e-02, PNorm = 85.2312, GNorm = 0.6840, lr_0 = 1.1500e-04
Loss = 7.3928e-02, PNorm = 85.2328, GNorm = 0.4429, lr_0 = 1.1492e-04
Loss = 7.3153e-02, PNorm = 85.2325, GNorm = 0.6057, lr_0 = 1.1484e-04
Loss = 7.5414e-02, PNorm = 85.2330, GNorm = 0.6071, lr_0 = 1.1476e-04
Loss = 8.1013e-02, PNorm = 85.2349, GNorm = 0.5450, lr_0 = 1.1468e-04
Loss = 6.9737e-02, PNorm = 85.2363, GNorm = 0.6236, lr_0 = 1.1460e-04
Loss = 7.0620e-02, PNorm = 85.2381, GNorm = 0.5304, lr_0 = 1.1452e-04
Loss = 8.5965e-02, PNorm = 85.2393, GNorm = 0.7782, lr_0 = 1.1445e-04
Loss = 7.1345e-02, PNorm = 85.2410, GNorm = 0.7749, lr_0 = 1.1437e-04
Loss = 6.6249e-02, PNorm = 85.2429, GNorm = 0.6262, lr_0 = 1.1429e-04
Loss = 6.7947e-02, PNorm = 85.2442, GNorm = 0.7351, lr_0 = 1.1421e-04
Loss = 6.3999e-02, PNorm = 85.2444, GNorm = 0.7174, lr_0 = 1.1413e-04
Loss = 7.6030e-02, PNorm = 85.2453, GNorm = 0.7734, lr_0 = 1.1405e-04
Loss = 7.4106e-02, PNorm = 85.2467, GNorm = 0.6006, lr_0 = 1.1398e-04
Loss = 6.9011e-02, PNorm = 85.2471, GNorm = 0.5530, lr_0 = 1.1390e-04
Loss = 8.4027e-02, PNorm = 85.2478, GNorm = 0.7133, lr_0 = 1.1382e-04
Loss = 6.6502e-02, PNorm = 85.2511, GNorm = 0.6643, lr_0 = 1.1374e-04
Loss = 7.2186e-02, PNorm = 85.2544, GNorm = 0.5342, lr_0 = 1.1366e-04
Loss = 7.7863e-02, PNorm = 85.2562, GNorm = 0.7126, lr_0 = 1.1359e-04
Loss = 7.0834e-02, PNorm = 85.2573, GNorm = 1.1943, lr_0 = 1.1351e-04
Loss = 8.1582e-02, PNorm = 85.2601, GNorm = 0.8951, lr_0 = 1.1343e-04
Loss = 8.2779e-02, PNorm = 85.2636, GNorm = 0.8118, lr_0 = 1.1335e-04
Loss = 7.1753e-02, PNorm = 85.2669, GNorm = 0.5508, lr_0 = 1.1328e-04
Loss = 7.3014e-02, PNorm = 85.2692, GNorm = 0.8371, lr_0 = 1.1320e-04
Loss = 7.1730e-02, PNorm = 85.2708, GNorm = 0.5992, lr_0 = 1.1312e-04
Loss = 7.5316e-02, PNorm = 85.2714, GNorm = 0.5167, lr_0 = 1.1304e-04
Loss = 8.6782e-02, PNorm = 85.2732, GNorm = 0.5437, lr_0 = 1.1297e-04
Loss = 8.0979e-02, PNorm = 85.2750, GNorm = 0.8762, lr_0 = 1.1289e-04
Loss = 7.4031e-02, PNorm = 85.2759, GNorm = 0.6044, lr_0 = 1.1281e-04
Loss = 7.5962e-02, PNorm = 85.2759, GNorm = 0.5947, lr_0 = 1.1273e-04
Loss = 7.0453e-02, PNorm = 85.2766, GNorm = 0.5259, lr_0 = 1.1266e-04
Loss = 8.4850e-02, PNorm = 85.2797, GNorm = 0.7269, lr_0 = 1.1258e-04
Loss = 7.7379e-02, PNorm = 85.2829, GNorm = 0.5423, lr_0 = 1.1250e-04
Loss = 7.5945e-02, PNorm = 85.2848, GNorm = 0.6849, lr_0 = 1.1243e-04
Loss = 6.8308e-02, PNorm = 85.2879, GNorm = 0.5870, lr_0 = 1.1235e-04
Loss = 6.9248e-02, PNorm = 85.2888, GNorm = 0.5440, lr_0 = 1.1227e-04
Loss = 8.4966e-02, PNorm = 85.2911, GNorm = 0.5093, lr_0 = 1.1219e-04
Loss = 6.9434e-02, PNorm = 85.2931, GNorm = 0.6771, lr_0 = 1.1212e-04
Loss = 7.5320e-02, PNorm = 85.2966, GNorm = 0.3654, lr_0 = 1.1204e-04
Loss = 6.9885e-02, PNorm = 85.2990, GNorm = 0.7021, lr_0 = 1.1196e-04
Loss = 6.9872e-02, PNorm = 85.3009, GNorm = 0.4808, lr_0 = 1.1189e-04
Loss = 8.6454e-02, PNorm = 85.3024, GNorm = 0.5336, lr_0 = 1.1181e-04
Loss = 8.0732e-02, PNorm = 85.3057, GNorm = 0.5640, lr_0 = 1.1173e-04
Loss = 7.2568e-02, PNorm = 85.3061, GNorm = 0.6205, lr_0 = 1.1166e-04
Loss = 7.4448e-02, PNorm = 85.3064, GNorm = 0.6364, lr_0 = 1.1158e-04
Loss = 7.1237e-02, PNorm = 85.3077, GNorm = 0.5414, lr_0 = 1.1150e-04
Loss = 8.1436e-02, PNorm = 85.3097, GNorm = 0.6640, lr_0 = 1.1143e-04
Loss = 8.3169e-02, PNorm = 85.3116, GNorm = 0.8687, lr_0 = 1.1135e-04
Loss = 6.9777e-02, PNorm = 85.3143, GNorm = 0.6225, lr_0 = 1.1128e-04
Loss = 7.1353e-02, PNorm = 85.3176, GNorm = 0.5940, lr_0 = 1.1120e-04
Loss = 7.5379e-02, PNorm = 85.3204, GNorm = 0.6994, lr_0 = 1.1112e-04
Loss = 7.2829e-02, PNorm = 85.3205, GNorm = 0.7699, lr_0 = 1.1105e-04
Loss = 7.3977e-02, PNorm = 85.3223, GNorm = 0.6064, lr_0 = 1.1097e-04
Loss = 8.0205e-02, PNorm = 85.3256, GNorm = 0.6370, lr_0 = 1.1089e-04
Loss = 7.7388e-02, PNorm = 85.3264, GNorm = 0.7523, lr_0 = 1.1082e-04
Loss = 8.1615e-02, PNorm = 85.3251, GNorm = 0.5446, lr_0 = 1.1074e-04
Loss = 6.7927e-02, PNorm = 85.3262, GNorm = 0.5412, lr_0 = 1.1067e-04
Loss = 8.4016e-02, PNorm = 85.3281, GNorm = 0.9638, lr_0 = 1.1059e-04
Loss = 7.9458e-02, PNorm = 85.3309, GNorm = 0.6312, lr_0 = 1.1052e-04
Loss = 8.1135e-02, PNorm = 85.3333, GNorm = 0.8148, lr_0 = 1.1044e-04
Loss = 7.8545e-02, PNorm = 85.3365, GNorm = 0.6910, lr_0 = 1.1036e-04
Loss = 8.0846e-02, PNorm = 85.3387, GNorm = 0.6321, lr_0 = 1.1029e-04
Loss = 8.2300e-02, PNorm = 85.3411, GNorm = 0.7619, lr_0 = 1.1021e-04
Loss = 7.5265e-02, PNorm = 85.3436, GNorm = 0.6705, lr_0 = 1.1014e-04
Loss = 7.1778e-02, PNorm = 85.3448, GNorm = 0.4693, lr_0 = 1.1006e-04
Loss = 7.8556e-02, PNorm = 85.3457, GNorm = 0.9003, lr_0 = 1.0999e-04
Loss = 8.2421e-02, PNorm = 85.3467, GNorm = 0.5368, lr_0 = 1.0991e-04
Loss = 7.6820e-02, PNorm = 85.3492, GNorm = 0.8614, lr_0 = 1.0984e-04
Loss = 8.1262e-02, PNorm = 85.3523, GNorm = 0.7574, lr_0 = 1.0976e-04
Loss = 7.1597e-02, PNorm = 85.3545, GNorm = 0.5861, lr_0 = 1.0969e-04
Loss = 7.4353e-02, PNorm = 85.3549, GNorm = 0.6315, lr_0 = 1.0961e-04
Loss = 7.1789e-02, PNorm = 85.3573, GNorm = 0.5894, lr_0 = 1.0954e-04
Loss = 8.1298e-02, PNorm = 85.3601, GNorm = 0.8140, lr_0 = 1.0946e-04
Loss = 7.3612e-02, PNorm = 85.3616, GNorm = 0.7386, lr_0 = 1.0939e-04
Loss = 7.5214e-02, PNorm = 85.3634, GNorm = 0.5924, lr_0 = 1.0931e-04
Loss = 7.7838e-02, PNorm = 85.3650, GNorm = 0.7149, lr_0 = 1.0924e-04
Loss = 8.0366e-02, PNorm = 85.3653, GNorm = 0.5258, lr_0 = 1.0916e-04
Loss = 7.5051e-02, PNorm = 85.3668, GNorm = 0.5975, lr_0 = 1.0909e-04
Loss = 7.5525e-02, PNorm = 85.3673, GNorm = 0.5118, lr_0 = 1.0901e-04
Loss = 8.6318e-02, PNorm = 85.3681, GNorm = 0.6256, lr_0 = 1.0894e-04
Loss = 7.1414e-02, PNorm = 85.3690, GNorm = 0.6309, lr_0 = 1.0886e-04
Loss = 8.4605e-02, PNorm = 85.3689, GNorm = 0.5544, lr_0 = 1.0879e-04
Loss = 7.8698e-02, PNorm = 85.3685, GNorm = 0.7349, lr_0 = 1.0871e-04
Loss = 7.0461e-02, PNorm = 85.3688, GNorm = 0.8108, lr_0 = 1.0864e-04
Loss = 7.3156e-02, PNorm = 85.3697, GNorm = 0.5471, lr_0 = 1.0856e-04
Validation mae = 0.226189
Epoch 29
Loss = 7.3418e-02, PNorm = 85.3707, GNorm = 0.6378, lr_0 = 1.0849e-04
Loss = 7.5590e-02, PNorm = 85.3728, GNorm = 0.6849, lr_0 = 1.0841e-04
Loss = 7.6351e-02, PNorm = 85.3741, GNorm = 0.6141, lr_0 = 1.0834e-04
Loss = 6.5951e-02, PNorm = 85.3754, GNorm = 0.6030, lr_0 = 1.0827e-04
Loss = 7.0016e-02, PNorm = 85.3773, GNorm = 0.6841, lr_0 = 1.0819e-04
Loss = 6.6364e-02, PNorm = 85.3795, GNorm = 0.5265, lr_0 = 1.0812e-04
Loss = 6.8486e-02, PNorm = 85.3811, GNorm = 0.6346, lr_0 = 1.0804e-04
Loss = 6.9698e-02, PNorm = 85.3828, GNorm = 0.5019, lr_0 = 1.0797e-04
Loss = 7.9639e-02, PNorm = 85.3838, GNorm = 0.8700, lr_0 = 1.0790e-04
Loss = 7.2316e-02, PNorm = 85.3857, GNorm = 0.8164, lr_0 = 1.0782e-04
Loss = 7.1169e-02, PNorm = 85.3876, GNorm = 0.5214, lr_0 = 1.0775e-04
Loss = 7.2815e-02, PNorm = 85.3906, GNorm = 0.7251, lr_0 = 1.0767e-04
Loss = 7.3459e-02, PNorm = 85.3920, GNorm = 0.5989, lr_0 = 1.0760e-04
Loss = 6.4715e-02, PNorm = 85.3940, GNorm = 0.5156, lr_0 = 1.0753e-04
Loss = 7.2826e-02, PNorm = 85.3966, GNorm = 0.5143, lr_0 = 1.0745e-04
Loss = 6.7491e-02, PNorm = 85.3983, GNorm = 0.6460, lr_0 = 1.0738e-04
Loss = 7.0515e-02, PNorm = 85.3994, GNorm = 0.4133, lr_0 = 1.0731e-04
Loss = 7.5886e-02, PNorm = 85.4015, GNorm = 0.5693, lr_0 = 1.0723e-04
Loss = 7.5461e-02, PNorm = 85.4021, GNorm = 0.8749, lr_0 = 1.0716e-04
Loss = 7.6787e-02, PNorm = 85.4028, GNorm = 0.7134, lr_0 = 1.0709e-04
Loss = 6.3822e-02, PNorm = 85.4045, GNorm = 0.5491, lr_0 = 1.0701e-04
Loss = 7.4349e-02, PNorm = 85.4056, GNorm = 0.5950, lr_0 = 1.0694e-04
Loss = 7.2772e-02, PNorm = 85.4051, GNorm = 0.6160, lr_0 = 1.0687e-04
Loss = 7.6847e-02, PNorm = 85.4069, GNorm = 0.5054, lr_0 = 1.0679e-04
Loss = 7.8693e-02, PNorm = 85.4105, GNorm = 0.7176, lr_0 = 1.0672e-04
Loss = 6.9463e-02, PNorm = 85.4117, GNorm = 0.6368, lr_0 = 1.0665e-04
Loss = 7.5392e-02, PNorm = 85.4127, GNorm = 0.7125, lr_0 = 1.0657e-04
Loss = 7.0863e-02, PNorm = 85.4140, GNorm = 0.6383, lr_0 = 1.0650e-04
Loss = 8.4109e-02, PNorm = 85.4143, GNorm = 0.7970, lr_0 = 1.0643e-04
Loss = 7.9718e-02, PNorm = 85.4157, GNorm = 0.8226, lr_0 = 1.0635e-04
Loss = 8.1032e-02, PNorm = 85.4166, GNorm = 0.7843, lr_0 = 1.0628e-04
Loss = 7.8505e-02, PNorm = 85.4169, GNorm = 0.9072, lr_0 = 1.0621e-04
Loss = 7.3958e-02, PNorm = 85.4185, GNorm = 0.6511, lr_0 = 1.0614e-04
Loss = 8.0357e-02, PNorm = 85.4207, GNorm = 0.5852, lr_0 = 1.0606e-04
Loss = 6.7249e-02, PNorm = 85.4223, GNorm = 0.4802, lr_0 = 1.0599e-04
Loss = 7.6285e-02, PNorm = 85.4241, GNorm = 0.5787, lr_0 = 1.0592e-04
Loss = 7.9748e-02, PNorm = 85.4256, GNorm = 0.6397, lr_0 = 1.0585e-04
Loss = 7.2596e-02, PNorm = 85.4282, GNorm = 0.5570, lr_0 = 1.0577e-04
Loss = 7.2677e-02, PNorm = 85.4309, GNorm = 0.6472, lr_0 = 1.0570e-04
Loss = 7.1908e-02, PNorm = 85.4324, GNorm = 0.6233, lr_0 = 1.0563e-04
Loss = 7.8184e-02, PNorm = 85.4333, GNorm = 0.7322, lr_0 = 1.0556e-04
Loss = 6.8995e-02, PNorm = 85.4348, GNorm = 0.5214, lr_0 = 1.0548e-04
Loss = 8.2609e-02, PNorm = 85.4364, GNorm = 0.6414, lr_0 = 1.0541e-04
Loss = 7.4237e-02, PNorm = 85.4386, GNorm = 0.4872, lr_0 = 1.0534e-04
Loss = 7.2292e-02, PNorm = 85.4408, GNorm = 0.6455, lr_0 = 1.0527e-04
Loss = 7.1633e-02, PNorm = 85.4410, GNorm = 0.5339, lr_0 = 1.0519e-04
Loss = 8.8691e-02, PNorm = 85.4424, GNorm = 0.6665, lr_0 = 1.0512e-04
Loss = 7.0653e-02, PNorm = 85.4442, GNorm = 0.4443, lr_0 = 1.0505e-04
Loss = 7.4171e-02, PNorm = 85.4454, GNorm = 0.5986, lr_0 = 1.0498e-04
Loss = 6.4967e-02, PNorm = 85.4456, GNorm = 0.4474, lr_0 = 1.0491e-04
Loss = 7.3778e-02, PNorm = 85.4462, GNorm = 0.5534, lr_0 = 1.0483e-04
Loss = 7.0794e-02, PNorm = 85.4467, GNorm = 0.4975, lr_0 = 1.0476e-04
Loss = 6.7822e-02, PNorm = 85.4478, GNorm = 0.7817, lr_0 = 1.0469e-04
Loss = 7.4112e-02, PNorm = 85.4482, GNorm = 0.6210, lr_0 = 1.0462e-04
Loss = 6.9636e-02, PNorm = 85.4494, GNorm = 0.6296, lr_0 = 1.0455e-04
Loss = 7.5248e-02, PNorm = 85.4510, GNorm = 0.6134, lr_0 = 1.0448e-04
Loss = 7.3512e-02, PNorm = 85.4529, GNorm = 0.4946, lr_0 = 1.0440e-04
Loss = 8.4765e-02, PNorm = 85.4538, GNorm = 0.7769, lr_0 = 1.0433e-04
Loss = 7.6490e-02, PNorm = 85.4551, GNorm = 0.6031, lr_0 = 1.0426e-04
Loss = 7.3864e-02, PNorm = 85.4569, GNorm = 0.7284, lr_0 = 1.0419e-04
Loss = 8.0369e-02, PNorm = 85.4578, GNorm = 0.7958, lr_0 = 1.0412e-04
Loss = 9.2714e-02, PNorm = 85.4584, GNorm = 0.6979, lr_0 = 1.0405e-04
Loss = 7.4921e-02, PNorm = 85.4607, GNorm = 0.7992, lr_0 = 1.0398e-04
Loss = 7.2249e-02, PNorm = 85.4640, GNorm = 0.4810, lr_0 = 1.0391e-04
Loss = 6.4121e-02, PNorm = 85.4663, GNorm = 0.5942, lr_0 = 1.0383e-04
Loss = 7.8960e-02, PNorm = 85.4676, GNorm = 0.6718, lr_0 = 1.0376e-04
Loss = 7.6147e-02, PNorm = 85.4697, GNorm = 0.7657, lr_0 = 1.0369e-04
Loss = 7.6977e-02, PNorm = 85.4720, GNorm = 0.6347, lr_0 = 1.0362e-04
Loss = 7.8046e-02, PNorm = 85.4730, GNorm = 0.6752, lr_0 = 1.0355e-04
Loss = 7.2923e-02, PNorm = 85.4745, GNorm = 0.7391, lr_0 = 1.0348e-04
Loss = 7.0273e-02, PNorm = 85.4756, GNorm = 0.4633, lr_0 = 1.0341e-04
Loss = 8.7539e-02, PNorm = 85.4767, GNorm = 0.5303, lr_0 = 1.0334e-04
Loss = 7.9310e-02, PNorm = 85.4782, GNorm = 0.6323, lr_0 = 1.0327e-04
Loss = 7.4024e-02, PNorm = 85.4808, GNorm = 0.7995, lr_0 = 1.0320e-04
Loss = 7.0696e-02, PNorm = 85.4816, GNorm = 0.5502, lr_0 = 1.0312e-04
Loss = 7.4479e-02, PNorm = 85.4826, GNorm = 0.7637, lr_0 = 1.0305e-04
Loss = 7.9274e-02, PNorm = 85.4836, GNorm = 0.7868, lr_0 = 1.0298e-04
Loss = 6.7570e-02, PNorm = 85.4851, GNorm = 0.6078, lr_0 = 1.0291e-04
Loss = 7.5919e-02, PNorm = 85.4853, GNorm = 0.5239, lr_0 = 1.0284e-04
Loss = 8.3557e-02, PNorm = 85.4863, GNorm = 0.6056, lr_0 = 1.0277e-04
Loss = 7.1363e-02, PNorm = 85.4875, GNorm = 0.7626, lr_0 = 1.0270e-04
Loss = 7.1749e-02, PNorm = 85.4892, GNorm = 0.6562, lr_0 = 1.0263e-04
Loss = 7.4901e-02, PNorm = 85.4906, GNorm = 0.5372, lr_0 = 1.0256e-04
Loss = 7.7085e-02, PNorm = 85.4912, GNorm = 0.5601, lr_0 = 1.0249e-04
Loss = 7.9605e-02, PNorm = 85.4936, GNorm = 0.4900, lr_0 = 1.0242e-04
Loss = 6.7550e-02, PNorm = 85.4965, GNorm = 0.5913, lr_0 = 1.0235e-04
Loss = 8.2599e-02, PNorm = 85.4987, GNorm = 0.5567, lr_0 = 1.0228e-04
Loss = 6.5065e-02, PNorm = 85.5008, GNorm = 0.5641, lr_0 = 1.0221e-04
Loss = 9.4721e-02, PNorm = 85.5029, GNorm = 0.7583, lr_0 = 1.0214e-04
Loss = 7.9358e-02, PNorm = 85.5055, GNorm = 0.5518, lr_0 = 1.0207e-04
Loss = 8.4770e-02, PNorm = 85.5079, GNorm = 0.9291, lr_0 = 1.0200e-04
Loss = 7.3675e-02, PNorm = 85.5083, GNorm = 0.6437, lr_0 = 1.0193e-04
Loss = 7.2171e-02, PNorm = 85.5100, GNorm = 0.5594, lr_0 = 1.0186e-04
Loss = 6.9973e-02, PNorm = 85.5103, GNorm = 0.5571, lr_0 = 1.0179e-04
Loss = 6.8644e-02, PNorm = 85.5106, GNorm = 0.6698, lr_0 = 1.0172e-04
Loss = 6.0302e-02, PNorm = 85.5113, GNorm = 0.4816, lr_0 = 1.0165e-04
Loss = 7.5730e-02, PNorm = 85.5127, GNorm = 0.4143, lr_0 = 1.0158e-04
Loss = 8.4048e-02, PNorm = 85.5145, GNorm = 0.7588, lr_0 = 1.0151e-04
Loss = 7.5346e-02, PNorm = 85.5171, GNorm = 0.6756, lr_0 = 1.0144e-04
Loss = 7.5068e-02, PNorm = 85.5187, GNorm = 0.7270, lr_0 = 1.0137e-04
Loss = 7.6559e-02, PNorm = 85.5207, GNorm = 0.6163, lr_0 = 1.0130e-04
Loss = 7.6514e-02, PNorm = 85.5229, GNorm = 0.5776, lr_0 = 1.0123e-04
Loss = 7.4792e-02, PNorm = 85.5248, GNorm = 0.7250, lr_0 = 1.0116e-04
Loss = 7.8661e-02, PNorm = 85.5259, GNorm = 0.6133, lr_0 = 1.0110e-04
Loss = 7.7786e-02, PNorm = 85.5263, GNorm = 0.7640, lr_0 = 1.0103e-04
Loss = 7.4102e-02, PNorm = 85.5275, GNorm = 0.5879, lr_0 = 1.0096e-04
Loss = 7.7721e-02, PNorm = 85.5292, GNorm = 0.8296, lr_0 = 1.0089e-04
Loss = 8.0226e-02, PNorm = 85.5316, GNorm = 0.6112, lr_0 = 1.0082e-04
Loss = 8.4611e-02, PNorm = 85.5350, GNorm = 0.5153, lr_0 = 1.0075e-04
Loss = 8.3419e-02, PNorm = 85.5373, GNorm = 0.6083, lr_0 = 1.0068e-04
Loss = 7.5045e-02, PNorm = 85.5382, GNorm = 0.6783, lr_0 = 1.0061e-04
Loss = 7.1373e-02, PNorm = 85.5395, GNorm = 0.5445, lr_0 = 1.0054e-04
Loss = 7.1564e-02, PNorm = 85.5417, GNorm = 0.6184, lr_0 = 1.0047e-04
Loss = 6.2155e-02, PNorm = 85.5434, GNorm = 0.5674, lr_0 = 1.0041e-04
Loss = 8.4425e-02, PNorm = 85.5447, GNorm = 0.6629, lr_0 = 1.0034e-04
Loss = 6.9791e-02, PNorm = 85.5444, GNorm = 0.5219, lr_0 = 1.0027e-04
Loss = 8.4020e-02, PNorm = 85.5442, GNorm = 0.6650, lr_0 = 1.0020e-04
Loss = 6.5517e-02, PNorm = 85.5456, GNorm = 0.6315, lr_0 = 1.0013e-04
Loss = 8.6487e-02, PNorm = 85.5467, GNorm = 0.6927, lr_0 = 1.0006e-04
Loss = 7.0023e-02, PNorm = 85.5472, GNorm = 0.5454, lr_0 = 1.0000e-04
Validation mae = 0.227722
Model 0 best validation mae = 0.226189 on epoch 28
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.225504
Ensemble test mae = 0.225504
Fold 2
Splitting data with seed 2
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.1, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=500, bias=False)
        (W_h): Linear(in_features=500, out_features=500, bias=False)
        (W_o): Linear(in_features=633, out_features=500, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.1, inplace=False)
    (1): Linear(in_features=500, out_features=500, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.1, inplace=False)
    (4): Linear(in_features=500, out_features=500, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.1, inplace=False)
    (7): Linear(in_features=500, out_features=1, bias=True)
  )
)
Number of parameters = 1,142,001
Moving model to cuda
Epoch 0
Loss = 1.0636e+00, PNorm = 47.8617, GNorm = 3.7859, lr_0 = 1.0413e-04
Loss = 1.0597e+00, PNorm = 47.8624, GNorm = 3.7952, lr_0 = 1.0788e-04
Loss = 8.6072e-01, PNorm = 47.8639, GNorm = 2.2377, lr_0 = 1.1163e-04
Loss = 1.0163e+00, PNorm = 47.8662, GNorm = 2.3337, lr_0 = 1.1537e-04
Loss = 8.5555e-01, PNorm = 47.8692, GNorm = 5.6562, lr_0 = 1.1913e-04
Loss = 8.2907e-01, PNorm = 47.8733, GNorm = 3.1230, lr_0 = 1.2287e-04
Loss = 7.7677e-01, PNorm = 47.8783, GNorm = 5.8219, lr_0 = 1.2663e-04
Loss = 6.7102e-01, PNorm = 47.8832, GNorm = 9.5229, lr_0 = 1.3038e-04
Loss = 6.8515e-01, PNorm = 47.8898, GNorm = 10.0194, lr_0 = 1.3413e-04
Loss = 8.0830e-01, PNorm = 47.8940, GNorm = 23.8191, lr_0 = 1.3788e-04
Loss = 5.9211e-01, PNorm = 47.8984, GNorm = 10.5829, lr_0 = 1.4163e-04
Loss = 6.0803e-01, PNorm = 47.9037, GNorm = 4.8790, lr_0 = 1.4537e-04
Loss = 6.0286e-01, PNorm = 47.9085, GNorm = 6.1778, lr_0 = 1.4913e-04
Loss = 6.6731e-01, PNorm = 47.9140, GNorm = 7.6831, lr_0 = 1.5288e-04
Loss = 4.9878e-01, PNorm = 47.9207, GNorm = 10.9655, lr_0 = 1.5662e-04
Loss = 4.7302e-01, PNorm = 47.9272, GNorm = 9.4481, lr_0 = 1.6038e-04
Loss = 5.5448e-01, PNorm = 47.9317, GNorm = 8.0615, lr_0 = 1.6412e-04
Loss = 5.5014e-01, PNorm = 47.9381, GNorm = 2.2242, lr_0 = 1.6788e-04
Loss = 5.3398e-01, PNorm = 47.9434, GNorm = 21.7724, lr_0 = 1.7163e-04
Loss = 4.7334e-01, PNorm = 47.9480, GNorm = 10.5870, lr_0 = 1.7538e-04
Loss = 5.0634e-01, PNorm = 47.9549, GNorm = 4.5562, lr_0 = 1.7913e-04
Loss = 5.1587e-01, PNorm = 47.9619, GNorm = 1.9382, lr_0 = 1.8288e-04
Loss = 4.3520e-01, PNorm = 47.9681, GNorm = 1.5851, lr_0 = 1.8662e-04
Loss = 4.2144e-01, PNorm = 47.9747, GNorm = 1.9824, lr_0 = 1.9038e-04
Loss = 3.9703e-01, PNorm = 47.9805, GNorm = 14.7453, lr_0 = 1.9413e-04
Loss = 3.9329e-01, PNorm = 47.9853, GNorm = 12.1555, lr_0 = 1.9788e-04
Loss = 4.0040e-01, PNorm = 47.9908, GNorm = 5.1274, lr_0 = 2.0163e-04
Loss = 3.8901e-01, PNorm = 47.9966, GNorm = 13.4249, lr_0 = 2.0537e-04
Loss = 4.7060e-01, PNorm = 48.0006, GNorm = 28.1243, lr_0 = 2.0913e-04
Loss = 4.7116e-01, PNorm = 48.0066, GNorm = 2.7093, lr_0 = 2.1288e-04
Loss = 4.0139e-01, PNorm = 48.0141, GNorm = 10.1873, lr_0 = 2.1663e-04
Loss = 3.9402e-01, PNorm = 48.0216, GNorm = 10.2777, lr_0 = 2.2038e-04
Loss = 4.4719e-01, PNorm = 48.0276, GNorm = 9.8129, lr_0 = 2.2412e-04
Loss = 3.6479e-01, PNorm = 48.0336, GNorm = 2.2362, lr_0 = 2.2787e-04
Loss = 3.8482e-01, PNorm = 48.0395, GNorm = 3.2596, lr_0 = 2.3163e-04
Loss = 3.9450e-01, PNorm = 48.0465, GNorm = 2.1583, lr_0 = 2.3538e-04
Loss = 3.3571e-01, PNorm = 48.0542, GNorm = 9.0970, lr_0 = 2.3913e-04
Loss = 2.7501e-01, PNorm = 48.0596, GNorm = 7.0146, lr_0 = 2.4288e-04
Loss = 3.5981e-01, PNorm = 48.0649, GNorm = 9.1374, lr_0 = 2.4662e-04
Loss = 3.4783e-01, PNorm = 48.0723, GNorm = 4.6413, lr_0 = 2.5038e-04
Loss = 3.3720e-01, PNorm = 48.0785, GNorm = 2.7205, lr_0 = 2.5413e-04
Loss = 2.9723e-01, PNorm = 48.0880, GNorm = 3.5261, lr_0 = 2.5788e-04
Loss = 3.0718e-01, PNorm = 48.0927, GNorm = 5.0429, lr_0 = 2.6163e-04
Loss = 3.1546e-01, PNorm = 48.0995, GNorm = 8.5152, lr_0 = 2.6537e-04
Loss = 3.1963e-01, PNorm = 48.1074, GNorm = 17.0289, lr_0 = 2.6912e-04
Loss = 3.2586e-01, PNorm = 48.1104, GNorm = 10.0492, lr_0 = 2.7288e-04
Loss = 3.3099e-01, PNorm = 48.1166, GNorm = 3.3474, lr_0 = 2.7663e-04
Loss = 3.2774e-01, PNorm = 48.1215, GNorm = 2.9829, lr_0 = 2.8038e-04
Loss = 3.5775e-01, PNorm = 48.1256, GNorm = 2.9075, lr_0 = 2.8413e-04
Loss = 4.1595e-01, PNorm = 48.1335, GNorm = 17.5787, lr_0 = 2.8787e-04
Loss = 3.3511e-01, PNorm = 48.1431, GNorm = 8.7892, lr_0 = 2.9163e-04
Loss = 3.0397e-01, PNorm = 48.1518, GNorm = 1.7925, lr_0 = 2.9538e-04
Loss = 3.1733e-01, PNorm = 48.1600, GNorm = 2.7262, lr_0 = 2.9913e-04
Loss = 3.3273e-01, PNorm = 48.1687, GNorm = 9.7365, lr_0 = 3.0288e-04
Loss = 3.1497e-01, PNorm = 48.1763, GNorm = 3.7996, lr_0 = 3.0662e-04
Loss = 3.0244e-01, PNorm = 48.1831, GNorm = 14.5866, lr_0 = 3.1037e-04
Loss = 3.6451e-01, PNorm = 48.1875, GNorm = 5.6332, lr_0 = 3.1413e-04
Loss = 3.3687e-01, PNorm = 48.1976, GNorm = 2.6190, lr_0 = 3.1788e-04
Loss = 3.4388e-01, PNorm = 48.2045, GNorm = 6.8789, lr_0 = 3.2163e-04
Loss = 3.3715e-01, PNorm = 48.2130, GNorm = 7.6959, lr_0 = 3.2538e-04
Loss = 3.3552e-01, PNorm = 48.2200, GNorm = 9.2844, lr_0 = 3.2912e-04
Loss = 3.2982e-01, PNorm = 48.2289, GNorm = 3.8403, lr_0 = 3.3288e-04
Loss = 2.7970e-01, PNorm = 48.2383, GNorm = 12.2590, lr_0 = 3.3663e-04
Loss = 2.7500e-01, PNorm = 48.2462, GNorm = 1.7417, lr_0 = 3.4038e-04
Loss = 3.1242e-01, PNorm = 48.2561, GNorm = 10.9362, lr_0 = 3.4413e-04
Loss = 4.1182e-01, PNorm = 48.2628, GNorm = 7.4130, lr_0 = 3.4787e-04
Loss = 3.2859e-01, PNorm = 48.2754, GNorm = 9.2447, lr_0 = 3.5162e-04
Loss = 3.1109e-01, PNorm = 48.2852, GNorm = 3.1687, lr_0 = 3.5538e-04
Loss = 3.4201e-01, PNorm = 48.2955, GNorm = 6.3868, lr_0 = 3.5913e-04
Loss = 3.0820e-01, PNorm = 48.3038, GNorm = 5.8048, lr_0 = 3.6288e-04
Loss = 3.3927e-01, PNorm = 48.3116, GNorm = 5.3207, lr_0 = 3.6662e-04
Loss = 2.9720e-01, PNorm = 48.3221, GNorm = 2.6639, lr_0 = 3.7037e-04
Loss = 3.4609e-01, PNorm = 48.3300, GNorm = 2.6438, lr_0 = 3.7413e-04
Loss = 3.3061e-01, PNorm = 48.3376, GNorm = 2.5594, lr_0 = 3.7788e-04
Loss = 3.0395e-01, PNorm = 48.3469, GNorm = 8.1647, lr_0 = 3.8163e-04
Loss = 3.0528e-01, PNorm = 48.3543, GNorm = 2.0196, lr_0 = 3.8537e-04
Loss = 3.2657e-01, PNorm = 48.3603, GNorm = 5.4120, lr_0 = 3.8912e-04
Loss = 2.7504e-01, PNorm = 48.3683, GNorm = 6.0097, lr_0 = 3.9287e-04
Loss = 3.4237e-01, PNorm = 48.3737, GNorm = 4.6335, lr_0 = 3.9663e-04
Loss = 2.9780e-01, PNorm = 48.3824, GNorm = 11.1958, lr_0 = 4.0038e-04
Loss = 3.2079e-01, PNorm = 48.3872, GNorm = 8.6878, lr_0 = 4.0413e-04
Loss = 2.8526e-01, PNorm = 48.3978, GNorm = 5.8186, lr_0 = 4.0787e-04
Loss = 3.0362e-01, PNorm = 48.4066, GNorm = 11.1768, lr_0 = 4.1162e-04
Loss = 3.1724e-01, PNorm = 48.4155, GNorm = 14.1802, lr_0 = 4.1537e-04
Loss = 3.5054e-01, PNorm = 48.4264, GNorm = 6.7456, lr_0 = 4.1913e-04
Loss = 3.2431e-01, PNorm = 48.4401, GNorm = 1.4952, lr_0 = 4.2288e-04
Loss = 3.0582e-01, PNorm = 48.4490, GNorm = 2.3788, lr_0 = 4.2662e-04
Loss = 3.1232e-01, PNorm = 48.4589, GNorm = 1.5908, lr_0 = 4.3037e-04
Loss = 3.6404e-01, PNorm = 48.4696, GNorm = 3.8074, lr_0 = 4.3412e-04
Loss = 2.9818e-01, PNorm = 48.4853, GNorm = 4.0140, lr_0 = 4.3788e-04
Loss = 3.5844e-01, PNorm = 48.4963, GNorm = 14.3715, lr_0 = 4.4163e-04
Loss = 3.1701e-01, PNorm = 48.5091, GNorm = 7.2918, lr_0 = 4.4538e-04
Loss = 2.9126e-01, PNorm = 48.5210, GNorm = 1.0680, lr_0 = 4.4912e-04
Loss = 3.0557e-01, PNorm = 48.5332, GNorm = 1.9002, lr_0 = 4.5287e-04
Loss = 3.0146e-01, PNorm = 48.5388, GNorm = 5.9193, lr_0 = 4.5662e-04
Loss = 2.6891e-01, PNorm = 48.5464, GNorm = 6.9192, lr_0 = 4.6038e-04
Loss = 3.0772e-01, PNorm = 48.5585, GNorm = 8.6310, lr_0 = 4.6413e-04
Loss = 3.2131e-01, PNorm = 48.5682, GNorm = 2.5479, lr_0 = 4.6787e-04
Loss = 3.3180e-01, PNorm = 48.5812, GNorm = 4.0513, lr_0 = 4.7162e-04
Loss = 3.3907e-01, PNorm = 48.5940, GNorm = 14.0623, lr_0 = 4.7537e-04
Loss = 3.7122e-01, PNorm = 48.6023, GNorm = 1.2087, lr_0 = 4.7913e-04
Loss = 2.7641e-01, PNorm = 48.6175, GNorm = 3.8027, lr_0 = 4.8288e-04
Loss = 3.1615e-01, PNorm = 48.6257, GNorm = 1.3599, lr_0 = 4.8663e-04
Loss = 3.0261e-01, PNorm = 48.6334, GNorm = 3.1293, lr_0 = 4.9038e-04
Loss = 2.6764e-01, PNorm = 48.6419, GNorm = 1.8244, lr_0 = 4.9412e-04
Loss = 2.6104e-01, PNorm = 48.6542, GNorm = 3.0374, lr_0 = 4.9788e-04
Loss = 3.0285e-01, PNorm = 48.6560, GNorm = 2.8225, lr_0 = 5.0163e-04
Loss = 3.3296e-01, PNorm = 48.6671, GNorm = 3.4121, lr_0 = 5.0538e-04
Loss = 3.5254e-01, PNorm = 48.6816, GNorm = 0.9323, lr_0 = 5.0913e-04
Loss = 2.7100e-01, PNorm = 48.6937, GNorm = 2.8621, lr_0 = 5.1287e-04
Loss = 3.3561e-01, PNorm = 48.7063, GNorm = 1.0927, lr_0 = 5.1663e-04
Loss = 3.0480e-01, PNorm = 48.7201, GNorm = 1.3027, lr_0 = 5.2038e-04
Loss = 2.8432e-01, PNorm = 48.7352, GNorm = 4.5238, lr_0 = 5.2413e-04
Loss = 2.8847e-01, PNorm = 48.7458, GNorm = 1.4475, lr_0 = 5.2788e-04
Loss = 2.9822e-01, PNorm = 48.7529, GNorm = 2.3730, lr_0 = 5.3162e-04
Loss = 3.1607e-01, PNorm = 48.7640, GNorm = 3.0412, lr_0 = 5.3538e-04
Loss = 2.5590e-01, PNorm = 48.7758, GNorm = 0.9855, lr_0 = 5.3912e-04
Loss = 2.7473e-01, PNorm = 48.7876, GNorm = 1.2544, lr_0 = 5.4288e-04
Loss = 2.5555e-01, PNorm = 48.7925, GNorm = 2.2566, lr_0 = 5.4663e-04
Loss = 2.9202e-01, PNorm = 48.8014, GNorm = 3.9428, lr_0 = 5.5038e-04
Validation mae = 0.316908
Epoch 1
Loss = 2.6416e-01, PNorm = 48.8094, GNorm = 4.4116, lr_0 = 5.5413e-04
Loss = 2.6946e-01, PNorm = 48.8232, GNorm = 1.8215, lr_0 = 5.5787e-04
Loss = 2.8225e-01, PNorm = 48.8321, GNorm = 1.3174, lr_0 = 5.6163e-04
Loss = 2.4009e-01, PNorm = 48.8429, GNorm = 1.5568, lr_0 = 5.6538e-04
Loss = 2.6025e-01, PNorm = 48.8500, GNorm = 7.8835, lr_0 = 5.6913e-04
Loss = 2.6990e-01, PNorm = 48.8646, GNorm = 1.4384, lr_0 = 5.7288e-04
Loss = 2.7832e-01, PNorm = 48.8709, GNorm = 1.1526, lr_0 = 5.7662e-04
Loss = 2.4313e-01, PNorm = 48.8841, GNorm = 1.5101, lr_0 = 5.8038e-04
Loss = 2.4411e-01, PNorm = 48.8933, GNorm = 3.9780, lr_0 = 5.8413e-04
Loss = 2.8238e-01, PNorm = 48.9010, GNorm = 1.5724, lr_0 = 5.8788e-04
Loss = 2.4969e-01, PNorm = 48.9126, GNorm = 1.0436, lr_0 = 5.9163e-04
Loss = 2.5470e-01, PNorm = 48.9271, GNorm = 3.2589, lr_0 = 5.9538e-04
Loss = 3.2846e-01, PNorm = 48.9385, GNorm = 1.3338, lr_0 = 5.9913e-04
Loss = 2.7259e-01, PNorm = 48.9606, GNorm = 2.2791, lr_0 = 6.0288e-04
Loss = 2.5082e-01, PNorm = 48.9780, GNorm = 3.9293, lr_0 = 6.0663e-04
Loss = 2.6531e-01, PNorm = 48.9940, GNorm = 4.5842, lr_0 = 6.1038e-04
Loss = 3.2435e-01, PNorm = 49.0057, GNorm = 6.4893, lr_0 = 6.1413e-04
Loss = 3.0660e-01, PNorm = 49.0221, GNorm = 6.0981, lr_0 = 6.1788e-04
Loss = 2.6726e-01, PNorm = 49.0382, GNorm = 1.1140, lr_0 = 6.2163e-04
Loss = 2.8227e-01, PNorm = 49.0548, GNorm = 6.0720, lr_0 = 6.2538e-04
Loss = 2.3694e-01, PNorm = 49.0649, GNorm = 4.3165, lr_0 = 6.2913e-04
Loss = 3.2742e-01, PNorm = 49.0801, GNorm = 9.3119, lr_0 = 6.3288e-04
Loss = 3.4153e-01, PNorm = 49.0948, GNorm = 6.0006, lr_0 = 6.3663e-04
Loss = 2.7656e-01, PNorm = 49.1135, GNorm = 2.9309, lr_0 = 6.4038e-04
Loss = 2.7080e-01, PNorm = 49.1321, GNorm = 3.3006, lr_0 = 6.4413e-04
Loss = 2.6300e-01, PNorm = 49.1454, GNorm = 6.6009, lr_0 = 6.4788e-04
Loss = 2.6081e-01, PNorm = 49.1631, GNorm = 2.3281, lr_0 = 6.5163e-04
Loss = 2.4632e-01, PNorm = 49.1797, GNorm = 3.8233, lr_0 = 6.5538e-04
Loss = 2.6146e-01, PNorm = 49.1912, GNorm = 1.9781, lr_0 = 6.5913e-04
Loss = 2.4580e-01, PNorm = 49.2029, GNorm = 3.9817, lr_0 = 6.6288e-04
Loss = 2.7999e-01, PNorm = 49.2118, GNorm = 2.1607, lr_0 = 6.6663e-04
Loss = 2.7042e-01, PNorm = 49.2245, GNorm = 3.2035, lr_0 = 6.7038e-04
Loss = 2.6654e-01, PNorm = 49.2423, GNorm = 1.9328, lr_0 = 6.7413e-04
Loss = 2.4650e-01, PNorm = 49.2600, GNorm = 1.1596, lr_0 = 6.7788e-04
Loss = 2.9208e-01, PNorm = 49.2707, GNorm = 1.6364, lr_0 = 6.8163e-04
Loss = 3.4255e-01, PNorm = 49.2923, GNorm = 0.8713, lr_0 = 6.8538e-04
Loss = 2.7303e-01, PNorm = 49.3095, GNorm = 3.4815, lr_0 = 6.8913e-04
Loss = 2.8990e-01, PNorm = 49.3295, GNorm = 5.7658, lr_0 = 6.9288e-04
Loss = 3.3442e-01, PNorm = 49.3522, GNorm = 2.8586, lr_0 = 6.9663e-04
Loss = 2.8597e-01, PNorm = 49.3739, GNorm = 1.3117, lr_0 = 7.0038e-04
Loss = 3.0652e-01, PNorm = 49.3931, GNorm = 2.9787, lr_0 = 7.0413e-04
Loss = 2.8609e-01, PNorm = 49.4143, GNorm = 4.3015, lr_0 = 7.0788e-04
Loss = 3.2009e-01, PNorm = 49.4358, GNorm = 1.3352, lr_0 = 7.1163e-04
Loss = 2.4573e-01, PNorm = 49.4540, GNorm = 1.9273, lr_0 = 7.1538e-04
Loss = 3.0023e-01, PNorm = 49.4678, GNorm = 1.6683, lr_0 = 7.1913e-04
Loss = 2.8534e-01, PNorm = 49.4783, GNorm = 2.2956, lr_0 = 7.2288e-04
Loss = 2.6129e-01, PNorm = 49.4971, GNorm = 1.8821, lr_0 = 7.2663e-04
Loss = 2.5619e-01, PNorm = 49.5122, GNorm = 1.2928, lr_0 = 7.3038e-04
Loss = 2.2904e-01, PNorm = 49.5259, GNorm = 2.3311, lr_0 = 7.3413e-04
Loss = 2.3530e-01, PNorm = 49.5391, GNorm = 3.1087, lr_0 = 7.3788e-04
Loss = 2.1044e-01, PNorm = 49.5499, GNorm = 1.5053, lr_0 = 7.4163e-04
Loss = 2.3633e-01, PNorm = 49.5687, GNorm = 0.7909, lr_0 = 7.4538e-04
Loss = 2.1195e-01, PNorm = 49.5834, GNorm = 1.0548, lr_0 = 7.4913e-04
Loss = 2.7294e-01, PNorm = 49.5966, GNorm = 1.0154, lr_0 = 7.5288e-04
Loss = 2.5478e-01, PNorm = 49.6068, GNorm = 2.1466, lr_0 = 7.5663e-04
Loss = 2.5670e-01, PNorm = 49.6257, GNorm = 1.2140, lr_0 = 7.6038e-04
Loss = 2.8729e-01, PNorm = 49.6443, GNorm = 3.4993, lr_0 = 7.6413e-04
Loss = 2.3883e-01, PNorm = 49.6586, GNorm = 2.1254, lr_0 = 7.6788e-04
Loss = 2.4035e-01, PNorm = 49.6756, GNorm = 1.7888, lr_0 = 7.7163e-04
Loss = 2.5737e-01, PNorm = 49.6959, GNorm = 6.8802, lr_0 = 7.7538e-04
Loss = 2.4596e-01, PNorm = 49.7098, GNorm = 1.8413, lr_0 = 7.7913e-04
Loss = 2.8677e-01, PNorm = 49.7354, GNorm = 1.0430, lr_0 = 7.8288e-04
Loss = 2.2572e-01, PNorm = 49.7551, GNorm = 2.0307, lr_0 = 7.8663e-04
Loss = 2.6885e-01, PNorm = 49.7702, GNorm = 2.4733, lr_0 = 7.9038e-04
Loss = 2.5824e-01, PNorm = 49.7832, GNorm = 5.2067, lr_0 = 7.9413e-04
Loss = 2.6031e-01, PNorm = 49.8061, GNorm = 2.8769, lr_0 = 7.9788e-04
Loss = 2.5152e-01, PNorm = 49.8255, GNorm = 3.5770, lr_0 = 8.0163e-04
Loss = 2.5674e-01, PNorm = 49.8426, GNorm = 3.2110, lr_0 = 8.0538e-04
Loss = 2.9484e-01, PNorm = 49.8552, GNorm = 1.5505, lr_0 = 8.0913e-04
Loss = 2.4028e-01, PNorm = 49.8713, GNorm = 1.8075, lr_0 = 8.1288e-04
Loss = 2.7287e-01, PNorm = 49.8956, GNorm = 2.3406, lr_0 = 8.1663e-04
Loss = 2.6276e-01, PNorm = 49.9207, GNorm = 2.7466, lr_0 = 8.2038e-04
Loss = 2.4385e-01, PNorm = 49.9425, GNorm = 2.5161, lr_0 = 8.2413e-04
Loss = 2.4459e-01, PNorm = 49.9594, GNorm = 1.1942, lr_0 = 8.2788e-04
Loss = 2.2615e-01, PNorm = 49.9782, GNorm = 0.9035, lr_0 = 8.3163e-04
Loss = 2.3878e-01, PNorm = 49.9974, GNorm = 4.1136, lr_0 = 8.3538e-04
Loss = 2.5848e-01, PNorm = 50.0086, GNorm = 3.4272, lr_0 = 8.3913e-04
Loss = 2.8833e-01, PNorm = 50.0244, GNorm = 3.9506, lr_0 = 8.4288e-04
Loss = 2.4996e-01, PNorm = 50.0611, GNorm = 2.0146, lr_0 = 8.4663e-04
Loss = 2.2080e-01, PNorm = 50.0799, GNorm = 2.7273, lr_0 = 8.5038e-04
Loss = 2.6317e-01, PNorm = 50.0996, GNorm = 5.3424, lr_0 = 8.5413e-04
Loss = 2.5186e-01, PNorm = 50.1201, GNorm = 3.6205, lr_0 = 8.5788e-04
Loss = 2.5335e-01, PNorm = 50.1338, GNorm = 1.6324, lr_0 = 8.6163e-04
Loss = 2.6570e-01, PNorm = 50.1554, GNorm = 1.0020, lr_0 = 8.6538e-04
Loss = 2.3926e-01, PNorm = 50.1803, GNorm = 2.8437, lr_0 = 8.6913e-04
Loss = 2.8383e-01, PNorm = 50.2125, GNorm = 4.1529, lr_0 = 8.7288e-04
Loss = 2.4817e-01, PNorm = 50.2298, GNorm = 4.5134, lr_0 = 8.7663e-04
Loss = 2.8728e-01, PNorm = 50.2561, GNorm = 4.2112, lr_0 = 8.8038e-04
Loss = 2.1885e-01, PNorm = 50.2782, GNorm = 1.1721, lr_0 = 8.8413e-04
Loss = 2.6007e-01, PNorm = 50.3022, GNorm = 2.0673, lr_0 = 8.8788e-04
Loss = 2.7734e-01, PNorm = 50.3301, GNorm = 3.7727, lr_0 = 8.9163e-04
Loss = 3.0335e-01, PNorm = 50.3547, GNorm = 1.8914, lr_0 = 8.9538e-04
Loss = 2.7567e-01, PNorm = 50.3875, GNorm = 1.0434, lr_0 = 8.9913e-04
Loss = 2.5926e-01, PNorm = 50.4110, GNorm = 4.8064, lr_0 = 9.0288e-04
Loss = 2.7627e-01, PNorm = 50.4386, GNorm = 4.0235, lr_0 = 9.0663e-04
Loss = 2.7362e-01, PNorm = 50.4751, GNorm = 2.5358, lr_0 = 9.1038e-04
Loss = 2.2034e-01, PNorm = 50.4879, GNorm = 6.2480, lr_0 = 9.1413e-04
Loss = 2.6983e-01, PNorm = 50.5077, GNorm = 3.2708, lr_0 = 9.1788e-04
Loss = 2.3018e-01, PNorm = 50.5311, GNorm = 0.7817, lr_0 = 9.2163e-04
Loss = 2.2952e-01, PNorm = 50.5494, GNorm = 1.7414, lr_0 = 9.2538e-04
Loss = 2.6065e-01, PNorm = 50.5647, GNorm = 3.4983, lr_0 = 9.2913e-04
Loss = 2.7350e-01, PNorm = 50.5982, GNorm = 4.0733, lr_0 = 9.3288e-04
Loss = 2.4000e-01, PNorm = 50.6341, GNorm = 3.4345, lr_0 = 9.3663e-04
Loss = 2.6885e-01, PNorm = 50.6604, GNorm = 1.2888, lr_0 = 9.4038e-04
Loss = 2.6461e-01, PNorm = 50.6792, GNorm = 0.9253, lr_0 = 9.4413e-04
Loss = 2.5133e-01, PNorm = 50.7038, GNorm = 5.9862, lr_0 = 9.4788e-04
Loss = 3.0699e-01, PNorm = 50.7317, GNorm = 4.5219, lr_0 = 9.5163e-04
Loss = 2.6015e-01, PNorm = 50.7769, GNorm = 2.8172, lr_0 = 9.5538e-04
Loss = 2.9537e-01, PNorm = 50.8100, GNorm = 0.9210, lr_0 = 9.5913e-04
Loss = 2.4549e-01, PNorm = 50.8416, GNorm = 1.5717, lr_0 = 9.6288e-04
Loss = 2.6042e-01, PNorm = 50.8624, GNorm = 4.2014, lr_0 = 9.6663e-04
Loss = 2.6172e-01, PNorm = 50.8789, GNorm = 3.8920, lr_0 = 9.7038e-04
Loss = 2.3933e-01, PNorm = 50.9128, GNorm = 2.8772, lr_0 = 9.7413e-04
Loss = 2.5243e-01, PNorm = 50.9374, GNorm = 1.2303, lr_0 = 9.7788e-04
Loss = 2.1361e-01, PNorm = 50.9615, GNorm = 2.3241, lr_0 = 9.8163e-04
Loss = 2.8617e-01, PNorm = 50.9982, GNorm = 4.7319, lr_0 = 9.8537e-04
Loss = 2.6026e-01, PNorm = 51.0239, GNorm = 0.9835, lr_0 = 9.8912e-04
Loss = 2.6078e-01, PNorm = 51.0432, GNorm = 2.7610, lr_0 = 9.9288e-04
Loss = 2.7064e-01, PNorm = 51.0714, GNorm = 0.8250, lr_0 = 9.9663e-04
Loss = 2.5063e-01, PNorm = 51.0940, GNorm = 3.5159, lr_0 = 9.9993e-04
Validation mae = 0.289449
Epoch 2
Loss = 2.3261e-01, PNorm = 51.1224, GNorm = 1.0668, lr_0 = 9.9925e-04
Loss = 2.6024e-01, PNorm = 51.1449, GNorm = 1.0580, lr_0 = 9.9856e-04
Loss = 2.1361e-01, PNorm = 51.1731, GNorm = 1.4969, lr_0 = 9.9788e-04
Loss = 2.6722e-01, PNorm = 51.1945, GNorm = 4.0831, lr_0 = 9.9719e-04
Loss = 2.7096e-01, PNorm = 51.2203, GNorm = 3.4508, lr_0 = 9.9651e-04
Loss = 2.2682e-01, PNorm = 51.2583, GNorm = 0.8226, lr_0 = 9.9583e-04
Loss = 2.0596e-01, PNorm = 51.2892, GNorm = 2.6798, lr_0 = 9.9515e-04
Loss = 2.3885e-01, PNorm = 51.3050, GNorm = 1.1644, lr_0 = 9.9446e-04
Loss = 2.3127e-01, PNorm = 51.3353, GNorm = 1.4964, lr_0 = 9.9378e-04
Loss = 2.3873e-01, PNorm = 51.3609, GNorm = 2.4068, lr_0 = 9.9310e-04
Loss = 2.4455e-01, PNorm = 51.3790, GNorm = 1.3776, lr_0 = 9.9242e-04
Loss = 2.5115e-01, PNorm = 51.4114, GNorm = 1.1433, lr_0 = 9.9174e-04
Loss = 2.6324e-01, PNorm = 51.4393, GNorm = 1.1661, lr_0 = 9.9106e-04
Loss = 2.3895e-01, PNorm = 51.4625, GNorm = 1.8477, lr_0 = 9.9038e-04
Loss = 2.8092e-01, PNorm = 51.4971, GNorm = 4.8911, lr_0 = 9.8971e-04
Loss = 2.9204e-01, PNorm = 51.5287, GNorm = 0.7495, lr_0 = 9.8903e-04
Loss = 2.5160e-01, PNorm = 51.5664, GNorm = 2.9926, lr_0 = 9.8835e-04
Loss = 2.1150e-01, PNorm = 51.5864, GNorm = 0.8292, lr_0 = 9.8767e-04
Loss = 2.3316e-01, PNorm = 51.6136, GNorm = 1.1146, lr_0 = 9.8700e-04
Loss = 2.6084e-01, PNorm = 51.6352, GNorm = 2.1634, lr_0 = 9.8632e-04
Loss = 2.7164e-01, PNorm = 51.6681, GNorm = 2.7085, lr_0 = 9.8564e-04
Loss = 2.5345e-01, PNorm = 51.6915, GNorm = 1.6644, lr_0 = 9.8497e-04
Loss = 2.4562e-01, PNorm = 51.7224, GNorm = 1.5406, lr_0 = 9.8429e-04
Loss = 2.1818e-01, PNorm = 51.7528, GNorm = 2.0086, lr_0 = 9.8362e-04
Loss = 2.3510e-01, PNorm = 51.7744, GNorm = 3.1723, lr_0 = 9.8295e-04
Loss = 2.4752e-01, PNorm = 51.8049, GNorm = 2.5400, lr_0 = 9.8227e-04
Loss = 2.1973e-01, PNorm = 51.8246, GNorm = 2.1611, lr_0 = 9.8160e-04
Loss = 2.2698e-01, PNorm = 51.8509, GNorm = 2.7812, lr_0 = 9.8093e-04
Loss = 2.1769e-01, PNorm = 51.8679, GNorm = 3.2770, lr_0 = 9.8026e-04
Loss = 2.6339e-01, PNorm = 51.8903, GNorm = 1.2787, lr_0 = 9.7958e-04
Loss = 2.2662e-01, PNorm = 51.9076, GNorm = 1.4566, lr_0 = 9.7891e-04
Loss = 2.1033e-01, PNorm = 51.9338, GNorm = 0.7919, lr_0 = 9.7824e-04
Loss = 2.2442e-01, PNorm = 51.9453, GNorm = 1.0566, lr_0 = 9.7757e-04
Loss = 2.0245e-01, PNorm = 51.9713, GNorm = 1.4102, lr_0 = 9.7690e-04
Loss = 2.5672e-01, PNorm = 51.9924, GNorm = 2.7512, lr_0 = 9.7623e-04
Loss = 2.4767e-01, PNorm = 52.0294, GNorm = 0.9085, lr_0 = 9.7556e-04
Loss = 2.2928e-01, PNorm = 52.0606, GNorm = 0.7289, lr_0 = 9.7490e-04
Loss = 2.2914e-01, PNorm = 52.0801, GNorm = 1.2899, lr_0 = 9.7423e-04
Loss = 2.4193e-01, PNorm = 52.1119, GNorm = 0.9864, lr_0 = 9.7356e-04
Loss = 2.3062e-01, PNorm = 52.1441, GNorm = 1.7617, lr_0 = 9.7289e-04
Loss = 2.3111e-01, PNorm = 52.1772, GNorm = 1.1129, lr_0 = 9.7223e-04
Loss = 2.1232e-01, PNorm = 52.1982, GNorm = 0.5863, lr_0 = 9.7156e-04
Loss = 2.1117e-01, PNorm = 52.2239, GNorm = 1.0942, lr_0 = 9.7090e-04
Loss = 2.3309e-01, PNorm = 52.2445, GNorm = 2.3632, lr_0 = 9.7023e-04
Loss = 2.1923e-01, PNorm = 52.2745, GNorm = 1.7731, lr_0 = 9.6957e-04
Loss = 2.5029e-01, PNorm = 52.2991, GNorm = 3.7902, lr_0 = 9.6890e-04
Loss = 2.2463e-01, PNorm = 52.3295, GNorm = 1.6729, lr_0 = 9.6824e-04
Loss = 2.3142e-01, PNorm = 52.3619, GNorm = 3.1528, lr_0 = 9.6757e-04
Loss = 2.0257e-01, PNorm = 52.3886, GNorm = 0.8477, lr_0 = 9.6691e-04
Loss = 2.4068e-01, PNorm = 52.4122, GNorm = 1.3984, lr_0 = 9.6625e-04
Loss = 2.0991e-01, PNorm = 52.4392, GNorm = 1.6408, lr_0 = 9.6559e-04
Loss = 2.0309e-01, PNorm = 52.4616, GNorm = 1.8106, lr_0 = 9.6493e-04
Loss = 2.5039e-01, PNorm = 52.4859, GNorm = 0.9903, lr_0 = 9.6427e-04
Loss = 2.4017e-01, PNorm = 52.5048, GNorm = 0.8032, lr_0 = 9.6360e-04
Loss = 1.9337e-01, PNorm = 52.5339, GNorm = 0.8578, lr_0 = 9.6294e-04
Loss = 2.4311e-01, PNorm = 52.5590, GNorm = 1.2029, lr_0 = 9.6228e-04
Loss = 2.3090e-01, PNorm = 52.5931, GNorm = 1.8646, lr_0 = 9.6163e-04
Loss = 2.2818e-01, PNorm = 52.6229, GNorm = 1.7518, lr_0 = 9.6097e-04
Loss = 2.5839e-01, PNorm = 52.6552, GNorm = 1.8568, lr_0 = 9.6031e-04
Loss = 2.3510e-01, PNorm = 52.6953, GNorm = 1.9424, lr_0 = 9.5965e-04
Loss = 2.1781e-01, PNorm = 52.7206, GNorm = 0.6802, lr_0 = 9.5899e-04
Loss = 2.0048e-01, PNorm = 52.7438, GNorm = 1.9206, lr_0 = 9.5834e-04
Loss = 2.4943e-01, PNorm = 52.7672, GNorm = 3.6729, lr_0 = 9.5768e-04
Loss = 2.2616e-01, PNorm = 52.7849, GNorm = 0.6499, lr_0 = 9.5702e-04
Loss = 2.4581e-01, PNorm = 52.8127, GNorm = 0.8132, lr_0 = 9.5637e-04
Loss = 2.2488e-01, PNorm = 52.8328, GNorm = 1.0803, lr_0 = 9.5571e-04
Loss = 2.2874e-01, PNorm = 52.8569, GNorm = 2.8190, lr_0 = 9.5506e-04
Loss = 2.4829e-01, PNorm = 52.8823, GNorm = 1.1602, lr_0 = 9.5440e-04
Loss = 2.4378e-01, PNorm = 52.9161, GNorm = 3.3668, lr_0 = 9.5375e-04
Loss = 2.1582e-01, PNorm = 52.9512, GNorm = 1.0455, lr_0 = 9.5310e-04
Loss = 2.2553e-01, PNorm = 52.9797, GNorm = 2.4904, lr_0 = 9.5244e-04
Loss = 2.3214e-01, PNorm = 53.0144, GNorm = 1.8879, lr_0 = 9.5179e-04
Loss = 2.3767e-01, PNorm = 53.0416, GNorm = 2.9875, lr_0 = 9.5114e-04
Loss = 2.1137e-01, PNorm = 53.0617, GNorm = 1.7349, lr_0 = 9.5049e-04
Loss = 2.7042e-01, PNorm = 53.0858, GNorm = 0.9285, lr_0 = 9.4984e-04
Loss = 2.2520e-01, PNorm = 53.1088, GNorm = 2.9619, lr_0 = 9.4919e-04
Loss = 2.4369e-01, PNorm = 53.1364, GNorm = 3.5126, lr_0 = 9.4854e-04
Loss = 2.2087e-01, PNorm = 53.1720, GNorm = 1.8751, lr_0 = 9.4789e-04
Loss = 2.2188e-01, PNorm = 53.2030, GNorm = 0.7454, lr_0 = 9.4724e-04
Loss = 2.2140e-01, PNorm = 53.2341, GNorm = 0.7771, lr_0 = 9.4659e-04
Loss = 2.0460e-01, PNorm = 53.2609, GNorm = 0.9678, lr_0 = 9.4594e-04
Loss = 2.1291e-01, PNorm = 53.2833, GNorm = 1.0699, lr_0 = 9.4529e-04
Loss = 2.2250e-01, PNorm = 53.3109, GNorm = 1.4913, lr_0 = 9.4464e-04
Loss = 2.0177e-01, PNorm = 53.3374, GNorm = 2.1041, lr_0 = 9.4400e-04
Loss = 2.0481e-01, PNorm = 53.3647, GNorm = 2.4425, lr_0 = 9.4335e-04
Loss = 2.4876e-01, PNorm = 53.4077, GNorm = 1.6353, lr_0 = 9.4270e-04
Loss = 2.3488e-01, PNorm = 53.4400, GNorm = 1.1725, lr_0 = 9.4206e-04
Loss = 2.2522e-01, PNorm = 53.4715, GNorm = 0.9474, lr_0 = 9.4141e-04
Loss = 2.1133e-01, PNorm = 53.4953, GNorm = 2.9075, lr_0 = 9.4077e-04
Loss = 2.2022e-01, PNorm = 53.5204, GNorm = 1.1670, lr_0 = 9.4012e-04
Loss = 1.8591e-01, PNorm = 53.5442, GNorm = 2.0204, lr_0 = 9.3948e-04
Loss = 2.4208e-01, PNorm = 53.5625, GNorm = 1.9512, lr_0 = 9.3884e-04
Loss = 1.9713e-01, PNorm = 53.5801, GNorm = 0.8477, lr_0 = 9.3819e-04
Loss = 2.2947e-01, PNorm = 53.5963, GNorm = 3.3367, lr_0 = 9.3755e-04
Loss = 2.2877e-01, PNorm = 53.6222, GNorm = 0.7835, lr_0 = 9.3691e-04
Loss = 2.6237e-01, PNorm = 53.6423, GNorm = 1.1632, lr_0 = 9.3627e-04
Loss = 2.3832e-01, PNorm = 53.6758, GNorm = 2.0011, lr_0 = 9.3562e-04
Loss = 2.3883e-01, PNorm = 53.7079, GNorm = 3.0525, lr_0 = 9.3498e-04
Loss = 2.3353e-01, PNorm = 53.7296, GNorm = 1.6107, lr_0 = 9.3434e-04
Loss = 2.2460e-01, PNorm = 53.7580, GNorm = 1.0566, lr_0 = 9.3370e-04
Loss = 2.0548e-01, PNorm = 53.7761, GNorm = 1.5033, lr_0 = 9.3306e-04
Loss = 2.1750e-01, PNorm = 53.7998, GNorm = 1.0515, lr_0 = 9.3242e-04
Loss = 2.2934e-01, PNorm = 53.8258, GNorm = 1.0906, lr_0 = 9.3178e-04
Loss = 2.1300e-01, PNorm = 53.8458, GNorm = 0.8098, lr_0 = 9.3115e-04
Loss = 2.1443e-01, PNorm = 53.8708, GNorm = 0.7887, lr_0 = 9.3051e-04
Loss = 1.9265e-01, PNorm = 53.8896, GNorm = 0.7601, lr_0 = 9.2987e-04
Loss = 2.0389e-01, PNorm = 53.9100, GNorm = 1.5782, lr_0 = 9.2923e-04
Loss = 2.0740e-01, PNorm = 53.9324, GNorm = 2.8223, lr_0 = 9.2860e-04
Loss = 2.2334e-01, PNorm = 53.9586, GNorm = 1.1583, lr_0 = 9.2796e-04
Loss = 2.1056e-01, PNorm = 53.9805, GNorm = 1.0662, lr_0 = 9.2733e-04
Loss = 2.2776e-01, PNorm = 54.0042, GNorm = 1.6418, lr_0 = 9.2669e-04
Loss = 2.3381e-01, PNorm = 54.0285, GNorm = 1.3126, lr_0 = 9.2606e-04
Loss = 2.1668e-01, PNorm = 54.0540, GNorm = 1.7332, lr_0 = 9.2542e-04
Loss = 2.0136e-01, PNorm = 54.0765, GNorm = 1.7396, lr_0 = 9.2479e-04
Loss = 2.1645e-01, PNorm = 54.1000, GNorm = 1.4139, lr_0 = 9.2415e-04
Loss = 2.3384e-01, PNorm = 54.1284, GNorm = 1.3191, lr_0 = 9.2352e-04
Loss = 2.2200e-01, PNorm = 54.1503, GNorm = 0.8824, lr_0 = 9.2289e-04
Loss = 2.2801e-01, PNorm = 54.1817, GNorm = 2.0531, lr_0 = 9.2226e-04
Loss = 2.3479e-01, PNorm = 54.2171, GNorm = 0.9174, lr_0 = 9.2162e-04
Loss = 2.0458e-01, PNorm = 54.2481, GNorm = 0.6945, lr_0 = 9.2099e-04
Validation mae = 0.288295
Epoch 3
Loss = 2.0910e-01, PNorm = 54.2700, GNorm = 1.1572, lr_0 = 9.2036e-04
Loss = 2.1011e-01, PNorm = 54.3024, GNorm = 0.7215, lr_0 = 9.1973e-04
Loss = 1.9777e-01, PNorm = 54.3272, GNorm = 0.6289, lr_0 = 9.1910e-04
Loss = 2.0062e-01, PNorm = 54.3465, GNorm = 0.8774, lr_0 = 9.1847e-04
Loss = 1.7568e-01, PNorm = 54.3709, GNorm = 0.8498, lr_0 = 9.1784e-04
Loss = 1.9997e-01, PNorm = 54.4009, GNorm = 0.8228, lr_0 = 9.1721e-04
Loss = 2.1502e-01, PNorm = 54.4277, GNorm = 3.1248, lr_0 = 9.1658e-04
Loss = 2.1305e-01, PNorm = 54.4549, GNorm = 1.2183, lr_0 = 9.1596e-04
Loss = 2.0263e-01, PNorm = 54.4826, GNorm = 1.7810, lr_0 = 9.1533e-04
Loss = 2.2346e-01, PNorm = 54.5023, GNorm = 1.3750, lr_0 = 9.1470e-04
Loss = 2.4468e-01, PNorm = 54.5311, GNorm = 0.9039, lr_0 = 9.1408e-04
Loss = 2.0620e-01, PNorm = 54.5643, GNorm = 0.9476, lr_0 = 9.1345e-04
Loss = 2.2460e-01, PNorm = 54.5916, GNorm = 1.0153, lr_0 = 9.1282e-04
Loss = 2.0981e-01, PNorm = 54.6174, GNorm = 1.3755, lr_0 = 9.1220e-04
Loss = 2.2914e-01, PNorm = 54.6477, GNorm = 2.8243, lr_0 = 9.1157e-04
Loss = 2.3891e-01, PNorm = 54.6785, GNorm = 1.8012, lr_0 = 9.1095e-04
Loss = 2.2891e-01, PNorm = 54.7161, GNorm = 0.9609, lr_0 = 9.1032e-04
Loss = 2.2696e-01, PNorm = 54.7478, GNorm = 1.5402, lr_0 = 9.0970e-04
Loss = 2.3460e-01, PNorm = 54.7811, GNorm = 1.3103, lr_0 = 9.0908e-04
Loss = 2.0923e-01, PNorm = 54.8131, GNorm = 0.5374, lr_0 = 9.0846e-04
Loss = 2.4316e-01, PNorm = 54.8437, GNorm = 1.7727, lr_0 = 9.0783e-04
Loss = 2.1101e-01, PNorm = 54.8780, GNorm = 2.0967, lr_0 = 9.0721e-04
Loss = 2.0065e-01, PNorm = 54.9029, GNorm = 0.7401, lr_0 = 9.0659e-04
Loss = 2.0740e-01, PNorm = 54.9312, GNorm = 0.9627, lr_0 = 9.0597e-04
Loss = 1.9789e-01, PNorm = 54.9566, GNorm = 1.6556, lr_0 = 9.0535e-04
Loss = 1.9062e-01, PNorm = 54.9779, GNorm = 1.4958, lr_0 = 9.0473e-04
Loss = 1.8946e-01, PNorm = 54.9940, GNorm = 0.9360, lr_0 = 9.0411e-04
Loss = 1.9037e-01, PNorm = 55.0156, GNorm = 2.9294, lr_0 = 9.0349e-04
Loss = 2.0446e-01, PNorm = 55.0436, GNorm = 1.4857, lr_0 = 9.0287e-04
Loss = 2.0956e-01, PNorm = 55.0653, GNorm = 1.8488, lr_0 = 9.0225e-04
Loss = 2.5320e-01, PNorm = 55.0901, GNorm = 1.8751, lr_0 = 9.0163e-04
Loss = 2.1513e-01, PNorm = 55.1152, GNorm = 1.0187, lr_0 = 9.0102e-04
Loss = 2.0579e-01, PNorm = 55.1386, GNorm = 1.9129, lr_0 = 9.0040e-04
Loss = 1.8573e-01, PNorm = 55.1598, GNorm = 0.6926, lr_0 = 8.9978e-04
Loss = 1.9999e-01, PNorm = 55.1840, GNorm = 0.7940, lr_0 = 8.9916e-04
Loss = 2.1880e-01, PNorm = 55.1988, GNorm = 1.6042, lr_0 = 8.9855e-04
Loss = 2.0954e-01, PNorm = 55.2310, GNorm = 1.3826, lr_0 = 8.9793e-04
Loss = 2.0405e-01, PNorm = 55.2581, GNorm = 2.7138, lr_0 = 8.9732e-04
Loss = 2.1521e-01, PNorm = 55.2777, GNorm = 1.1455, lr_0 = 8.9670e-04
Loss = 1.9523e-01, PNorm = 55.3023, GNorm = 1.2175, lr_0 = 8.9609e-04
Loss = 2.0415e-01, PNorm = 55.3282, GNorm = 0.7678, lr_0 = 8.9548e-04
Loss = 2.0367e-01, PNorm = 55.3536, GNorm = 1.1868, lr_0 = 8.9486e-04
Loss = 2.0535e-01, PNorm = 55.3736, GNorm = 2.3433, lr_0 = 8.9425e-04
Loss = 2.0069e-01, PNorm = 55.4051, GNorm = 1.6240, lr_0 = 8.9364e-04
Loss = 2.2244e-01, PNorm = 55.4408, GNorm = 1.6234, lr_0 = 8.9302e-04
Loss = 1.8919e-01, PNorm = 55.4629, GNorm = 0.9616, lr_0 = 8.9241e-04
Loss = 2.0504e-01, PNorm = 55.4816, GNorm = 0.8058, lr_0 = 8.9180e-04
Loss = 2.0799e-01, PNorm = 55.5011, GNorm = 0.8721, lr_0 = 8.9119e-04
Loss = 1.8555e-01, PNorm = 55.5335, GNorm = 1.5630, lr_0 = 8.9058e-04
Loss = 1.7702e-01, PNorm = 55.5579, GNorm = 1.0871, lr_0 = 8.8997e-04
Loss = 2.3239e-01, PNorm = 55.5854, GNorm = 4.2100, lr_0 = 8.8936e-04
Loss = 2.4886e-01, PNorm = 55.6315, GNorm = 0.7804, lr_0 = 8.8875e-04
Loss = 1.9210e-01, PNorm = 55.6765, GNorm = 1.0979, lr_0 = 8.8814e-04
Loss = 2.0650e-01, PNorm = 55.7095, GNorm = 1.6659, lr_0 = 8.8753e-04
Loss = 2.0931e-01, PNorm = 55.7411, GNorm = 0.8224, lr_0 = 8.8693e-04
Loss = 1.8443e-01, PNorm = 55.7663, GNorm = 1.1728, lr_0 = 8.8632e-04
Loss = 1.7533e-01, PNorm = 55.7869, GNorm = 0.8971, lr_0 = 8.8571e-04
Loss = 2.2222e-01, PNorm = 55.8085, GNorm = 0.8289, lr_0 = 8.8510e-04
Loss = 2.0593e-01, PNorm = 55.8433, GNorm = 1.0743, lr_0 = 8.8450e-04
Loss = 1.8692e-01, PNorm = 55.8761, GNorm = 1.1076, lr_0 = 8.8389e-04
Loss = 2.1853e-01, PNorm = 55.9105, GNorm = 1.3247, lr_0 = 8.8329e-04
Loss = 1.9504e-01, PNorm = 55.9498, GNorm = 1.2126, lr_0 = 8.8268e-04
Loss = 1.9462e-01, PNorm = 55.9802, GNorm = 0.8862, lr_0 = 8.8208e-04
Loss = 1.8485e-01, PNorm = 55.9992, GNorm = 0.9584, lr_0 = 8.8147e-04
Loss = 1.9105e-01, PNorm = 56.0232, GNorm = 1.4308, lr_0 = 8.8087e-04
Loss = 2.1214e-01, PNorm = 56.0417, GNorm = 1.0816, lr_0 = 8.8026e-04
Loss = 2.3086e-01, PNorm = 56.0702, GNorm = 1.6608, lr_0 = 8.7966e-04
Loss = 2.1075e-01, PNorm = 56.0852, GNorm = 1.1807, lr_0 = 8.7906e-04
Loss = 2.2306e-01, PNorm = 56.1022, GNorm = 1.9933, lr_0 = 8.7846e-04
Loss = 2.2383e-01, PNorm = 56.1277, GNorm = 1.1617, lr_0 = 8.7785e-04
Loss = 2.0827e-01, PNorm = 56.1597, GNorm = 1.2984, lr_0 = 8.7725e-04
Loss = 1.8918e-01, PNorm = 56.1852, GNorm = 1.2827, lr_0 = 8.7665e-04
Loss = 2.2653e-01, PNorm = 56.2027, GNorm = 2.2059, lr_0 = 8.7605e-04
Loss = 1.8480e-01, PNorm = 56.2296, GNorm = 1.2693, lr_0 = 8.7545e-04
Loss = 2.1839e-01, PNorm = 56.2619, GNorm = 1.3486, lr_0 = 8.7485e-04
Loss = 2.1629e-01, PNorm = 56.2898, GNorm = 1.4103, lr_0 = 8.7425e-04
Loss = 1.8324e-01, PNorm = 56.3207, GNorm = 0.7229, lr_0 = 8.7365e-04
Loss = 1.9718e-01, PNorm = 56.3383, GNorm = 1.0794, lr_0 = 8.7306e-04
Loss = 2.0577e-01, PNorm = 56.3652, GNorm = 1.3371, lr_0 = 8.7246e-04
Loss = 2.1949e-01, PNorm = 56.3871, GNorm = 0.9363, lr_0 = 8.7186e-04
Loss = 2.0468e-01, PNorm = 56.4173, GNorm = 2.6052, lr_0 = 8.7126e-04
Loss = 1.9269e-01, PNorm = 56.4547, GNorm = 1.0968, lr_0 = 8.7067e-04
Loss = 1.8757e-01, PNorm = 56.4706, GNorm = 1.6051, lr_0 = 8.7007e-04
Loss = 2.0842e-01, PNorm = 56.4960, GNorm = 1.0145, lr_0 = 8.6947e-04
Loss = 1.6872e-01, PNorm = 56.5260, GNorm = 2.1098, lr_0 = 8.6888e-04
Loss = 1.5746e-01, PNorm = 56.5476, GNorm = 0.5170, lr_0 = 8.6828e-04
Loss = 2.0568e-01, PNorm = 56.5643, GNorm = 0.7866, lr_0 = 8.6769e-04
Loss = 1.8628e-01, PNorm = 56.5827, GNorm = 1.3285, lr_0 = 8.6709e-04
Loss = 1.8120e-01, PNorm = 56.6022, GNorm = 2.3621, lr_0 = 8.6650e-04
Loss = 2.1345e-01, PNorm = 56.6324, GNorm = 0.8349, lr_0 = 8.6590e-04
Loss = 2.1604e-01, PNorm = 56.6440, GNorm = 0.9374, lr_0 = 8.6531e-04
Loss = 2.2776e-01, PNorm = 56.6737, GNorm = 1.6376, lr_0 = 8.6472e-04
Loss = 1.9560e-01, PNorm = 56.6963, GNorm = 0.8417, lr_0 = 8.6413e-04
Loss = 1.9462e-01, PNorm = 56.7286, GNorm = 1.3004, lr_0 = 8.6353e-04
Loss = 2.2037e-01, PNorm = 56.7579, GNorm = 0.8064, lr_0 = 8.6294e-04
Loss = 1.9658e-01, PNorm = 56.7776, GNorm = 0.8821, lr_0 = 8.6235e-04
Loss = 1.9757e-01, PNorm = 56.7912, GNorm = 1.9276, lr_0 = 8.6176e-04
Loss = 1.7858e-01, PNorm = 56.8176, GNorm = 1.2935, lr_0 = 8.6117e-04
Loss = 2.1631e-01, PNorm = 56.8386, GNorm = 2.3062, lr_0 = 8.6058e-04
Loss = 2.1938e-01, PNorm = 56.8723, GNorm = 1.1655, lr_0 = 8.5999e-04
Loss = 2.1267e-01, PNorm = 56.9057, GNorm = 1.3866, lr_0 = 8.5940e-04
Loss = 2.3233e-01, PNorm = 56.9419, GNorm = 1.1047, lr_0 = 8.5881e-04
Loss = 1.9230e-01, PNorm = 56.9756, GNorm = 1.1875, lr_0 = 8.5823e-04
Loss = 2.0760e-01, PNorm = 56.9995, GNorm = 1.5497, lr_0 = 8.5764e-04
Loss = 2.3027e-01, PNorm = 57.0303, GNorm = 1.9902, lr_0 = 8.5705e-04
Loss = 1.9985e-01, PNorm = 57.0491, GNorm = 1.0400, lr_0 = 8.5646e-04
Loss = 1.8335e-01, PNorm = 57.0726, GNorm = 0.7838, lr_0 = 8.5588e-04
Loss = 1.9469e-01, PNorm = 57.0886, GNorm = 1.5373, lr_0 = 8.5529e-04
Loss = 2.0122e-01, PNorm = 57.1113, GNorm = 0.9358, lr_0 = 8.5470e-04
Loss = 1.8756e-01, PNorm = 57.1330, GNorm = 2.4721, lr_0 = 8.5412e-04
Loss = 2.0431e-01, PNorm = 57.1592, GNorm = 0.7383, lr_0 = 8.5353e-04
Loss = 2.0119e-01, PNorm = 57.1873, GNorm = 0.5933, lr_0 = 8.5295e-04
Loss = 1.9884e-01, PNorm = 57.2118, GNorm = 0.7736, lr_0 = 8.5236e-04
Loss = 1.9159e-01, PNorm = 57.2331, GNorm = 0.9756, lr_0 = 8.5178e-04
Loss = 1.8171e-01, PNorm = 57.2513, GNorm = 0.7839, lr_0 = 8.5120e-04
Loss = 1.9976e-01, PNorm = 57.2726, GNorm = 1.8782, lr_0 = 8.5061e-04
Loss = 1.9408e-01, PNorm = 57.2914, GNorm = 0.8246, lr_0 = 8.5003e-04
Loss = 1.9986e-01, PNorm = 57.3186, GNorm = 0.5915, lr_0 = 8.4945e-04
Loss = 2.0790e-01, PNorm = 57.3416, GNorm = 0.8850, lr_0 = 8.4887e-04
Loss = 2.0920e-01, PNorm = 57.3671, GNorm = 1.8925, lr_0 = 8.4828e-04
Validation mae = 0.258306
Epoch 4
Loss = 1.9455e-01, PNorm = 57.3945, GNorm = 0.8549, lr_0 = 8.4770e-04
Loss = 1.8505e-01, PNorm = 57.4194, GNorm = 1.1781, lr_0 = 8.4712e-04
Loss = 1.8595e-01, PNorm = 57.4444, GNorm = 1.0257, lr_0 = 8.4654e-04
Loss = 1.7356e-01, PNorm = 57.4682, GNorm = 1.6248, lr_0 = 8.4596e-04
Loss = 1.8637e-01, PNorm = 57.4999, GNorm = 1.3269, lr_0 = 8.4538e-04
Loss = 1.8545e-01, PNorm = 57.5291, GNorm = 0.7556, lr_0 = 8.4480e-04
Loss = 1.9094e-01, PNorm = 57.5568, GNorm = 1.0991, lr_0 = 8.4423e-04
Loss = 1.7728e-01, PNorm = 57.5791, GNorm = 0.8818, lr_0 = 8.4365e-04
Loss = 1.8490e-01, PNorm = 57.6049, GNorm = 0.8171, lr_0 = 8.4307e-04
Loss = 1.9908e-01, PNorm = 57.6236, GNorm = 0.5751, lr_0 = 8.4249e-04
Loss = 1.6089e-01, PNorm = 57.6464, GNorm = 1.5203, lr_0 = 8.4191e-04
Loss = 1.8522e-01, PNorm = 57.6718, GNorm = 0.7826, lr_0 = 8.4134e-04
Loss = 1.7439e-01, PNorm = 57.6902, GNorm = 1.0767, lr_0 = 8.4076e-04
Loss = 1.8308e-01, PNorm = 57.7076, GNorm = 0.7096, lr_0 = 8.4019e-04
Loss = 1.8891e-01, PNorm = 57.7296, GNorm = 1.4953, lr_0 = 8.3961e-04
Loss = 2.0544e-01, PNorm = 57.7544, GNorm = 1.2860, lr_0 = 8.3903e-04
Loss = 1.7371e-01, PNorm = 57.7795, GNorm = 0.7993, lr_0 = 8.3846e-04
Loss = 1.5746e-01, PNorm = 57.7978, GNorm = 0.5347, lr_0 = 8.3789e-04
Loss = 2.2074e-01, PNorm = 57.8207, GNorm = 1.5250, lr_0 = 8.3731e-04
Loss = 1.8531e-01, PNorm = 57.8579, GNorm = 2.9939, lr_0 = 8.3674e-04
Loss = 1.8893e-01, PNorm = 57.8861, GNorm = 0.7420, lr_0 = 8.3616e-04
Loss = 1.8513e-01, PNorm = 57.9159, GNorm = 0.9794, lr_0 = 8.3559e-04
Loss = 1.8690e-01, PNorm = 57.9399, GNorm = 0.7981, lr_0 = 8.3502e-04
Loss = 2.0050e-01, PNorm = 57.9610, GNorm = 1.1375, lr_0 = 8.3445e-04
Loss = 1.9170e-01, PNorm = 57.9941, GNorm = 1.5515, lr_0 = 8.3388e-04
Loss = 2.0207e-01, PNorm = 58.0192, GNorm = 2.3518, lr_0 = 8.3330e-04
Loss = 1.6058e-01, PNorm = 58.0460, GNorm = 1.2908, lr_0 = 8.3273e-04
Loss = 1.8720e-01, PNorm = 58.0637, GNorm = 1.7728, lr_0 = 8.3216e-04
Loss = 1.8916e-01, PNorm = 58.0868, GNorm = 1.1103, lr_0 = 8.3159e-04
Loss = 1.9880e-01, PNorm = 58.1076, GNorm = 0.8111, lr_0 = 8.3102e-04
Loss = 1.8746e-01, PNorm = 58.1298, GNorm = 0.8525, lr_0 = 8.3045e-04
Loss = 1.9229e-01, PNorm = 58.1469, GNorm = 1.1503, lr_0 = 8.2988e-04
Loss = 1.9877e-01, PNorm = 58.1715, GNorm = 1.0373, lr_0 = 8.2932e-04
Loss = 1.8461e-01, PNorm = 58.2025, GNorm = 1.1340, lr_0 = 8.2875e-04
Loss = 1.5817e-01, PNorm = 58.2121, GNorm = 0.8123, lr_0 = 8.2818e-04
Loss = 2.0800e-01, PNorm = 58.2307, GNorm = 1.1476, lr_0 = 8.2761e-04
Loss = 1.9038e-01, PNorm = 58.2573, GNorm = 1.0627, lr_0 = 8.2705e-04
Loss = 1.7217e-01, PNorm = 58.2838, GNorm = 1.0611, lr_0 = 8.2648e-04
Loss = 1.9725e-01, PNorm = 58.3120, GNorm = 1.8472, lr_0 = 8.2591e-04
Loss = 1.7869e-01, PNorm = 58.3402, GNorm = 2.4067, lr_0 = 8.2535e-04
Loss = 1.8532e-01, PNorm = 58.3734, GNorm = 0.5187, lr_0 = 8.2478e-04
Loss = 1.9265e-01, PNorm = 58.3964, GNorm = 0.9059, lr_0 = 8.2422e-04
Loss = 2.1739e-01, PNorm = 58.4191, GNorm = 1.7079, lr_0 = 8.2365e-04
Loss = 2.0314e-01, PNorm = 58.4478, GNorm = 1.0607, lr_0 = 8.2309e-04
Loss = 1.9953e-01, PNorm = 58.4777, GNorm = 0.6043, lr_0 = 8.2252e-04
Loss = 1.8849e-01, PNorm = 58.5071, GNorm = 1.7852, lr_0 = 8.2196e-04
Loss = 1.9356e-01, PNorm = 58.5306, GNorm = 0.9344, lr_0 = 8.2140e-04
Loss = 1.8031e-01, PNorm = 58.5590, GNorm = 0.6184, lr_0 = 8.2084e-04
Loss = 2.0888e-01, PNorm = 58.5850, GNorm = 2.0378, lr_0 = 8.2027e-04
Loss = 1.6900e-01, PNorm = 58.6150, GNorm = 0.5661, lr_0 = 8.1971e-04
Loss = 1.9918e-01, PNorm = 58.6431, GNorm = 1.5043, lr_0 = 8.1915e-04
Loss = 1.9746e-01, PNorm = 58.6666, GNorm = 3.6663, lr_0 = 8.1859e-04
Loss = 2.2196e-01, PNorm = 58.6960, GNorm = 0.8337, lr_0 = 8.1803e-04
Loss = 1.9628e-01, PNorm = 58.7249, GNorm = 1.3616, lr_0 = 8.1747e-04
Loss = 1.7220e-01, PNorm = 58.7459, GNorm = 1.4410, lr_0 = 8.1691e-04
Loss = 2.0126e-01, PNorm = 58.7656, GNorm = 0.8816, lr_0 = 8.1635e-04
Loss = 1.8364e-01, PNorm = 58.7923, GNorm = 0.9481, lr_0 = 8.1579e-04
Loss = 1.7303e-01, PNorm = 58.8207, GNorm = 0.7537, lr_0 = 8.1523e-04
Loss = 1.9175e-01, PNorm = 58.8480, GNorm = 1.6165, lr_0 = 8.1467e-04
Loss = 1.8351e-01, PNorm = 58.8745, GNorm = 0.6482, lr_0 = 8.1411e-04
Loss = 1.7469e-01, PNorm = 58.8965, GNorm = 1.5775, lr_0 = 8.1355e-04
Loss = 1.6289e-01, PNorm = 58.9191, GNorm = 0.7537, lr_0 = 8.1300e-04
Loss = 1.8597e-01, PNorm = 58.9507, GNorm = 3.7784, lr_0 = 8.1244e-04
Loss = 2.0146e-01, PNorm = 58.9813, GNorm = 0.8166, lr_0 = 8.1188e-04
Loss = 1.9581e-01, PNorm = 59.0099, GNorm = 0.6257, lr_0 = 8.1133e-04
Loss = 1.5600e-01, PNorm = 59.0318, GNorm = 1.1611, lr_0 = 8.1077e-04
Loss = 1.9711e-01, PNorm = 59.0613, GNorm = 0.9423, lr_0 = 8.1022e-04
Loss = 1.9462e-01, PNorm = 59.0886, GNorm = 0.5952, lr_0 = 8.0966e-04
Loss = 1.9462e-01, PNorm = 59.1135, GNorm = 0.7318, lr_0 = 8.0911e-04
Loss = 1.8894e-01, PNorm = 59.1385, GNorm = 0.8912, lr_0 = 8.0855e-04
Loss = 1.9376e-01, PNorm = 59.1695, GNorm = 1.0268, lr_0 = 8.0800e-04
Loss = 1.7785e-01, PNorm = 59.1959, GNorm = 1.2444, lr_0 = 8.0745e-04
Loss = 2.3110e-01, PNorm = 59.2210, GNorm = 1.4664, lr_0 = 8.0689e-04
Loss = 1.8271e-01, PNorm = 59.2483, GNorm = 0.7920, lr_0 = 8.0634e-04
Loss = 1.9148e-01, PNorm = 59.2742, GNorm = 0.7668, lr_0 = 8.0579e-04
Loss = 1.7794e-01, PNorm = 59.2922, GNorm = 1.0481, lr_0 = 8.0523e-04
Loss = 1.8398e-01, PNorm = 59.3210, GNorm = 1.2946, lr_0 = 8.0468e-04
Loss = 1.6617e-01, PNorm = 59.3389, GNorm = 0.6313, lr_0 = 8.0413e-04
Loss = 2.0041e-01, PNorm = 59.3638, GNorm = 1.3598, lr_0 = 8.0358e-04
Loss = 1.8468e-01, PNorm = 59.3873, GNorm = 1.0892, lr_0 = 8.0303e-04
Loss = 2.1488e-01, PNorm = 59.4080, GNorm = 1.0857, lr_0 = 8.0248e-04
Loss = 2.0050e-01, PNorm = 59.4351, GNorm = 0.8571, lr_0 = 8.0193e-04
Loss = 1.9064e-01, PNorm = 59.4598, GNorm = 0.7977, lr_0 = 8.0138e-04
Loss = 1.6342e-01, PNorm = 59.4824, GNorm = 1.0228, lr_0 = 8.0083e-04
Loss = 1.8186e-01, PNorm = 59.5047, GNorm = 0.7446, lr_0 = 8.0028e-04
Loss = 1.8021e-01, PNorm = 59.5281, GNorm = 1.0663, lr_0 = 7.9974e-04
Loss = 1.5962e-01, PNorm = 59.5509, GNorm = 1.5874, lr_0 = 7.9919e-04
Loss = 1.9884e-01, PNorm = 59.5734, GNorm = 1.8268, lr_0 = 7.9864e-04
Loss = 1.7891e-01, PNorm = 59.5929, GNorm = 0.7236, lr_0 = 7.9809e-04
Loss = 2.0194e-01, PNorm = 59.6231, GNorm = 0.8903, lr_0 = 7.9755e-04
Loss = 1.8879e-01, PNorm = 59.6367, GNorm = 0.8565, lr_0 = 7.9700e-04
Loss = 1.8934e-01, PNorm = 59.6674, GNorm = 1.2512, lr_0 = 7.9645e-04
Loss = 1.8887e-01, PNorm = 59.6960, GNorm = 1.5878, lr_0 = 7.9591e-04
Loss = 1.9796e-01, PNorm = 59.7346, GNorm = 0.7597, lr_0 = 7.9536e-04
Loss = 1.8322e-01, PNorm = 59.7695, GNorm = 1.6444, lr_0 = 7.9482e-04
Loss = 1.9378e-01, PNorm = 59.7957, GNorm = 1.2235, lr_0 = 7.9427e-04
Loss = 1.8669e-01, PNorm = 59.8193, GNorm = 0.7977, lr_0 = 7.9373e-04
Loss = 1.8431e-01, PNorm = 59.8425, GNorm = 0.5035, lr_0 = 7.9319e-04
Loss = 1.9762e-01, PNorm = 59.8589, GNorm = 1.5451, lr_0 = 7.9264e-04
Loss = 1.6624e-01, PNorm = 59.8828, GNorm = 1.3370, lr_0 = 7.9210e-04
Loss = 1.9724e-01, PNorm = 59.9110, GNorm = 0.5640, lr_0 = 7.9156e-04
Loss = 1.8389e-01, PNorm = 59.9438, GNorm = 0.8236, lr_0 = 7.9101e-04
Loss = 1.7055e-01, PNorm = 59.9691, GNorm = 1.0641, lr_0 = 7.9047e-04
Loss = 1.7779e-01, PNorm = 59.9857, GNorm = 1.0516, lr_0 = 7.8993e-04
Loss = 1.9390e-01, PNorm = 60.0061, GNorm = 1.4915, lr_0 = 7.8939e-04
Loss = 1.8477e-01, PNorm = 60.0273, GNorm = 1.6715, lr_0 = 7.8885e-04
Loss = 1.8430e-01, PNorm = 60.0507, GNorm = 0.7041, lr_0 = 7.8831e-04
Loss = 1.9481e-01, PNorm = 60.0735, GNorm = 1.1449, lr_0 = 7.8777e-04
Loss = 1.7016e-01, PNorm = 60.1016, GNorm = 0.6798, lr_0 = 7.8723e-04
Loss = 1.8267e-01, PNorm = 60.1189, GNorm = 0.6945, lr_0 = 7.8669e-04
Loss = 2.0391e-01, PNorm = 60.1490, GNorm = 1.0243, lr_0 = 7.8615e-04
Loss = 1.8751e-01, PNorm = 60.1732, GNorm = 0.9684, lr_0 = 7.8561e-04
Loss = 1.5485e-01, PNorm = 60.1979, GNorm = 0.7497, lr_0 = 7.8507e-04
Loss = 1.9896e-01, PNorm = 60.2201, GNorm = 0.9235, lr_0 = 7.8454e-04
Loss = 1.7194e-01, PNorm = 60.2481, GNorm = 0.5147, lr_0 = 7.8400e-04
Loss = 1.8223e-01, PNorm = 60.2638, GNorm = 0.8774, lr_0 = 7.8346e-04
Loss = 1.7822e-01, PNorm = 60.2914, GNorm = 0.8312, lr_0 = 7.8293e-04
Loss = 1.8195e-01, PNorm = 60.3115, GNorm = 0.8688, lr_0 = 7.8239e-04
Loss = 1.8883e-01, PNorm = 60.3293, GNorm = 0.5978, lr_0 = 7.8185e-04
Loss = 1.8527e-01, PNorm = 60.3525, GNorm = 0.7103, lr_0 = 7.8132e-04
Validation mae = 0.251884
Epoch 5
Loss = 1.8574e-01, PNorm = 60.3789, GNorm = 1.0003, lr_0 = 7.8078e-04
Loss = 1.5993e-01, PNorm = 60.4063, GNorm = 0.8472, lr_0 = 7.8025e-04
Loss = 2.1166e-01, PNorm = 60.4365, GNorm = 0.9745, lr_0 = 7.7971e-04
Loss = 1.7480e-01, PNorm = 60.4674, GNorm = 1.2541, lr_0 = 7.7918e-04
Loss = 1.7638e-01, PNorm = 60.4985, GNorm = 1.0288, lr_0 = 7.7864e-04
Loss = 1.8143e-01, PNorm = 60.5244, GNorm = 1.4843, lr_0 = 7.7811e-04
Loss = 1.8938e-01, PNorm = 60.5499, GNorm = 0.6301, lr_0 = 7.7758e-04
Loss = 1.6498e-01, PNorm = 60.5673, GNorm = 1.0350, lr_0 = 7.7705e-04
Loss = 1.7566e-01, PNorm = 60.5880, GNorm = 0.9304, lr_0 = 7.7651e-04
Loss = 2.0999e-01, PNorm = 60.6121, GNorm = 1.2577, lr_0 = 7.7598e-04
Loss = 1.8826e-01, PNorm = 60.6409, GNorm = 0.9433, lr_0 = 7.7545e-04
Loss = 1.5796e-01, PNorm = 60.6666, GNorm = 1.0849, lr_0 = 7.7492e-04
Loss = 1.5699e-01, PNorm = 60.6922, GNorm = 0.8517, lr_0 = 7.7439e-04
Loss = 1.6456e-01, PNorm = 60.7118, GNorm = 0.9887, lr_0 = 7.7386e-04
Loss = 2.0313e-01, PNorm = 60.7420, GNorm = 1.5321, lr_0 = 7.7333e-04
Loss = 2.1074e-01, PNorm = 60.7710, GNorm = 0.6509, lr_0 = 7.7280e-04
Loss = 2.1634e-01, PNorm = 60.8073, GNorm = 1.1034, lr_0 = 7.7227e-04
Loss = 1.8496e-01, PNorm = 60.8464, GNorm = 0.9270, lr_0 = 7.7174e-04
Loss = 1.8229e-01, PNorm = 60.8763, GNorm = 1.0370, lr_0 = 7.7121e-04
Loss = 1.5038e-01, PNorm = 60.9026, GNorm = 0.6302, lr_0 = 7.7068e-04
Loss = 1.7938e-01, PNorm = 60.9248, GNorm = 1.3289, lr_0 = 7.7015e-04
Loss = 1.8506e-01, PNorm = 60.9459, GNorm = 0.7741, lr_0 = 7.6963e-04
Loss = 2.0937e-01, PNorm = 60.9746, GNorm = 1.0606, lr_0 = 7.6910e-04
Loss = 1.8193e-01, PNorm = 60.9906, GNorm = 1.1219, lr_0 = 7.6857e-04
Loss = 1.8778e-01, PNorm = 61.0168, GNorm = 0.5829, lr_0 = 7.6805e-04
Loss = 1.6293e-01, PNorm = 61.0456, GNorm = 0.7461, lr_0 = 7.6752e-04
Loss = 1.6599e-01, PNorm = 61.0812, GNorm = 1.5659, lr_0 = 7.6699e-04
Loss = 2.1310e-01, PNorm = 61.1046, GNorm = 1.2078, lr_0 = 7.6647e-04
Loss = 1.8710e-01, PNorm = 61.1333, GNorm = 1.7300, lr_0 = 7.6594e-04
Loss = 1.6541e-01, PNorm = 61.1518, GNorm = 0.5661, lr_0 = 7.6542e-04
Loss = 2.0220e-01, PNorm = 61.1693, GNorm = 0.8856, lr_0 = 7.6489e-04
Loss = 1.8314e-01, PNorm = 61.1901, GNorm = 1.8118, lr_0 = 7.6437e-04
Loss = 1.8951e-01, PNorm = 61.2133, GNorm = 0.5954, lr_0 = 7.6385e-04
Loss = 1.8369e-01, PNorm = 61.2313, GNorm = 1.4754, lr_0 = 7.6332e-04
Loss = 1.9005e-01, PNorm = 61.2508, GNorm = 0.6410, lr_0 = 7.6280e-04
Loss = 1.7996e-01, PNorm = 61.2741, GNorm = 0.8525, lr_0 = 7.6228e-04
Loss = 1.7466e-01, PNorm = 61.2981, GNorm = 0.7042, lr_0 = 7.6176e-04
Loss = 1.9537e-01, PNorm = 61.3264, GNorm = 1.1438, lr_0 = 7.6123e-04
Loss = 1.7558e-01, PNorm = 61.3534, GNorm = 1.4038, lr_0 = 7.6071e-04
Loss = 1.5722e-01, PNorm = 61.3758, GNorm = 0.5663, lr_0 = 7.6019e-04
Loss = 1.7485e-01, PNorm = 61.4002, GNorm = 2.1411, lr_0 = 7.5967e-04
Loss = 2.0194e-01, PNorm = 61.4177, GNorm = 1.1178, lr_0 = 7.5915e-04
Loss = 1.6716e-01, PNorm = 61.4407, GNorm = 0.9697, lr_0 = 7.5863e-04
Loss = 1.8518e-01, PNorm = 61.4739, GNorm = 0.8915, lr_0 = 7.5811e-04
Loss = 1.6912e-01, PNorm = 61.5028, GNorm = 0.7171, lr_0 = 7.5759e-04
Loss = 2.0484e-01, PNorm = 61.5272, GNorm = 0.7867, lr_0 = 7.5707e-04
Loss = 1.7898e-01, PNorm = 61.5435, GNorm = 1.0239, lr_0 = 7.5655e-04
Loss = 1.8251e-01, PNorm = 61.5661, GNorm = 1.3354, lr_0 = 7.5603e-04
Loss = 1.8745e-01, PNorm = 61.5935, GNorm = 0.5993, lr_0 = 7.5552e-04
Loss = 1.5692e-01, PNorm = 61.6221, GNorm = 0.7927, lr_0 = 7.5500e-04
Loss = 1.7868e-01, PNorm = 61.6431, GNorm = 0.9359, lr_0 = 7.5448e-04
Loss = 1.8284e-01, PNorm = 61.6686, GNorm = 1.4762, lr_0 = 7.5397e-04
Loss = 1.7690e-01, PNorm = 61.6935, GNorm = 1.2213, lr_0 = 7.5345e-04
Loss = 1.6837e-01, PNorm = 61.7174, GNorm = 0.6306, lr_0 = 7.5293e-04
Loss = 1.8376e-01, PNorm = 61.7388, GNorm = 0.9851, lr_0 = 7.5242e-04
Loss = 1.5765e-01, PNorm = 61.7671, GNorm = 0.7968, lr_0 = 7.5190e-04
Loss = 1.8345e-01, PNorm = 61.7984, GNorm = 1.6364, lr_0 = 7.5139e-04
Loss = 1.6416e-01, PNorm = 61.8298, GNorm = 1.2107, lr_0 = 7.5087e-04
Loss = 1.7353e-01, PNorm = 61.8484, GNorm = 1.2961, lr_0 = 7.5036e-04
Loss = 1.9548e-01, PNorm = 61.8746, GNorm = 0.8479, lr_0 = 7.4984e-04
Loss = 1.9414e-01, PNorm = 61.9018, GNorm = 0.9581, lr_0 = 7.4933e-04
Loss = 1.9233e-01, PNorm = 61.9268, GNorm = 0.8071, lr_0 = 7.4882e-04
Loss = 1.9862e-01, PNorm = 61.9560, GNorm = 0.7176, lr_0 = 7.4830e-04
Loss = 1.7724e-01, PNorm = 61.9766, GNorm = 0.8838, lr_0 = 7.4779e-04
Loss = 1.7404e-01, PNorm = 62.0009, GNorm = 0.5927, lr_0 = 7.4728e-04
Loss = 1.6647e-01, PNorm = 62.0240, GNorm = 0.4993, lr_0 = 7.4677e-04
Loss = 1.6243e-01, PNorm = 62.0526, GNorm = 1.0097, lr_0 = 7.4625e-04
Loss = 1.6754e-01, PNorm = 62.0773, GNorm = 0.7547, lr_0 = 7.4574e-04
Loss = 1.8535e-01, PNorm = 62.1020, GNorm = 1.4137, lr_0 = 7.4523e-04
Loss = 1.7279e-01, PNorm = 62.1219, GNorm = 1.0243, lr_0 = 7.4472e-04
Loss = 1.4819e-01, PNorm = 62.1422, GNorm = 0.6722, lr_0 = 7.4421e-04
Loss = 1.7055e-01, PNorm = 62.1606, GNorm = 0.9909, lr_0 = 7.4370e-04
Loss = 1.8580e-01, PNorm = 62.1823, GNorm = 0.9103, lr_0 = 7.4319e-04
Loss = 1.7584e-01, PNorm = 62.2077, GNorm = 1.1209, lr_0 = 7.4268e-04
Loss = 1.6667e-01, PNorm = 62.2276, GNorm = 0.6933, lr_0 = 7.4217e-04
Loss = 1.4853e-01, PNorm = 62.2486, GNorm = 1.3655, lr_0 = 7.4167e-04
Loss = 1.8039e-01, PNorm = 62.2659, GNorm = 0.7538, lr_0 = 7.4116e-04
Loss = 1.7151e-01, PNorm = 62.2985, GNorm = 0.8494, lr_0 = 7.4065e-04
Loss = 1.8796e-01, PNorm = 62.3205, GNorm = 0.7348, lr_0 = 7.4014e-04
Loss = 2.0765e-01, PNorm = 62.3514, GNorm = 0.8642, lr_0 = 7.3964e-04
Loss = 1.6230e-01, PNorm = 62.3672, GNorm = 0.9729, lr_0 = 7.3913e-04
Loss = 1.7705e-01, PNorm = 62.3946, GNorm = 0.6627, lr_0 = 7.3862e-04
Loss = 1.7526e-01, PNorm = 62.4173, GNorm = 0.7426, lr_0 = 7.3812e-04
Loss = 1.8305e-01, PNorm = 62.4376, GNorm = 1.0596, lr_0 = 7.3761e-04
Loss = 1.7704e-01, PNorm = 62.4608, GNorm = 0.4943, lr_0 = 7.3711e-04
Loss = 1.7385e-01, PNorm = 62.4806, GNorm = 0.6361, lr_0 = 7.3660e-04
Loss = 1.8308e-01, PNorm = 62.5049, GNorm = 0.7250, lr_0 = 7.3610e-04
Loss = 1.6634e-01, PNorm = 62.5228, GNorm = 0.6974, lr_0 = 7.3559e-04
Loss = 1.7821e-01, PNorm = 62.5446, GNorm = 1.1158, lr_0 = 7.3509e-04
Loss = 1.7196e-01, PNorm = 62.5601, GNorm = 0.9322, lr_0 = 7.3458e-04
Loss = 1.7552e-01, PNorm = 62.5927, GNorm = 1.5502, lr_0 = 7.3408e-04
Loss = 1.6556e-01, PNorm = 62.6075, GNorm = 0.7166, lr_0 = 7.3358e-04
Loss = 1.7395e-01, PNorm = 62.6322, GNorm = 0.9639, lr_0 = 7.3308e-04
Loss = 1.6489e-01, PNorm = 62.6609, GNorm = 0.6247, lr_0 = 7.3257e-04
Loss = 1.7659e-01, PNorm = 62.6775, GNorm = 0.9185, lr_0 = 7.3207e-04
Loss = 1.4365e-01, PNorm = 62.6918, GNorm = 0.6114, lr_0 = 7.3157e-04
Loss = 1.6842e-01, PNorm = 62.7084, GNorm = 0.7269, lr_0 = 7.3107e-04
Loss = 1.7710e-01, PNorm = 62.7315, GNorm = 0.8438, lr_0 = 7.3057e-04
Loss = 1.7686e-01, PNorm = 62.7580, GNorm = 0.6260, lr_0 = 7.3007e-04
Loss = 1.6766e-01, PNorm = 62.7746, GNorm = 0.6173, lr_0 = 7.2957e-04
Loss = 1.8874e-01, PNorm = 62.7912, GNorm = 1.0356, lr_0 = 7.2907e-04
Loss = 1.7045e-01, PNorm = 62.8076, GNorm = 0.6027, lr_0 = 7.2857e-04
Loss = 1.6835e-01, PNorm = 62.8305, GNorm = 1.2981, lr_0 = 7.2807e-04
Loss = 1.6572e-01, PNorm = 62.8446, GNorm = 0.6311, lr_0 = 7.2757e-04
Loss = 1.8618e-01, PNorm = 62.8653, GNorm = 0.6185, lr_0 = 7.2707e-04
Loss = 1.6354e-01, PNorm = 62.8918, GNorm = 0.7946, lr_0 = 7.2657e-04
Loss = 1.8998e-01, PNorm = 62.9149, GNorm = 1.3901, lr_0 = 7.2608e-04
Loss = 1.7428e-01, PNorm = 62.9430, GNorm = 1.1569, lr_0 = 7.2558e-04
Loss = 1.7049e-01, PNorm = 62.9589, GNorm = 0.7023, lr_0 = 7.2508e-04
Loss = 1.6920e-01, PNorm = 62.9816, GNorm = 0.6857, lr_0 = 7.2458e-04
Loss = 1.5202e-01, PNorm = 62.9949, GNorm = 0.8615, lr_0 = 7.2409e-04
Loss = 1.7888e-01, PNorm = 63.0176, GNorm = 1.1767, lr_0 = 7.2359e-04
Loss = 1.8164e-01, PNorm = 63.0453, GNorm = 0.6353, lr_0 = 7.2310e-04
Loss = 1.4401e-01, PNorm = 63.0664, GNorm = 0.8521, lr_0 = 7.2260e-04
Loss = 1.6613e-01, PNorm = 63.0936, GNorm = 2.2182, lr_0 = 7.2211e-04
Loss = 1.5887e-01, PNorm = 63.1187, GNorm = 2.0438, lr_0 = 7.2161e-04
Loss = 1.5165e-01, PNorm = 63.1464, GNorm = 1.6746, lr_0 = 7.2112e-04
Loss = 1.7576e-01, PNorm = 63.1700, GNorm = 1.1921, lr_0 = 7.2062e-04
Loss = 1.7953e-01, PNorm = 63.1909, GNorm = 0.7146, lr_0 = 7.2013e-04
Loss = 1.6399e-01, PNorm = 63.2167, GNorm = 0.6952, lr_0 = 7.1964e-04
Validation mae = 0.250346
Epoch 6
Loss = 1.7484e-01, PNorm = 63.2422, GNorm = 1.5121, lr_0 = 7.1914e-04
Loss = 1.8476e-01, PNorm = 63.2663, GNorm = 0.7367, lr_0 = 7.1865e-04
Loss = 1.7361e-01, PNorm = 63.2931, GNorm = 2.0429, lr_0 = 7.1816e-04
Loss = 1.5133e-01, PNorm = 63.3139, GNorm = 1.3833, lr_0 = 7.1767e-04
Loss = 1.6684e-01, PNorm = 63.3326, GNorm = 0.7729, lr_0 = 7.1717e-04
Loss = 1.5937e-01, PNorm = 63.3515, GNorm = 0.8559, lr_0 = 7.1668e-04
Loss = 1.4757e-01, PNorm = 63.3703, GNorm = 0.8975, lr_0 = 7.1619e-04
Loss = 1.7298e-01, PNorm = 63.3901, GNorm = 1.1105, lr_0 = 7.1570e-04
Loss = 1.3949e-01, PNorm = 63.4141, GNorm = 0.6531, lr_0 = 7.1521e-04
Loss = 1.6894e-01, PNorm = 63.4339, GNorm = 0.9833, lr_0 = 7.1472e-04
Loss = 1.6078e-01, PNorm = 63.4597, GNorm = 1.0544, lr_0 = 7.1423e-04
Loss = 1.7303e-01, PNorm = 63.4839, GNorm = 0.6203, lr_0 = 7.1374e-04
Loss = 1.3307e-01, PNorm = 63.5059, GNorm = 0.7338, lr_0 = 7.1325e-04
Loss = 1.7457e-01, PNorm = 63.5309, GNorm = 1.5135, lr_0 = 7.1277e-04
Loss = 1.6405e-01, PNorm = 63.5532, GNorm = 0.7389, lr_0 = 7.1228e-04
Loss = 1.3479e-01, PNorm = 63.5774, GNorm = 0.5033, lr_0 = 7.1179e-04
Loss = 1.6182e-01, PNorm = 63.5971, GNorm = 0.8624, lr_0 = 7.1130e-04
Loss = 1.6244e-01, PNorm = 63.6203, GNorm = 1.1261, lr_0 = 7.1081e-04
Loss = 1.4445e-01, PNorm = 63.6386, GNorm = 1.1201, lr_0 = 7.1033e-04
Loss = 1.6843e-01, PNorm = 63.6589, GNorm = 1.0277, lr_0 = 7.0984e-04
Loss = 1.8493e-01, PNorm = 63.6805, GNorm = 0.8574, lr_0 = 7.0935e-04
Loss = 1.5647e-01, PNorm = 63.7075, GNorm = 1.4257, lr_0 = 7.0887e-04
Loss = 1.8349e-01, PNorm = 63.7389, GNorm = 1.8134, lr_0 = 7.0838e-04
Loss = 1.5745e-01, PNorm = 63.7782, GNorm = 1.2864, lr_0 = 7.0790e-04
Loss = 1.7769e-01, PNorm = 63.8023, GNorm = 1.9990, lr_0 = 7.0741e-04
Loss = 1.6662e-01, PNorm = 63.8354, GNorm = 0.9955, lr_0 = 7.0693e-04
Loss = 1.6268e-01, PNorm = 63.8570, GNorm = 0.9776, lr_0 = 7.0644e-04
Loss = 1.4917e-01, PNorm = 63.8833, GNorm = 0.5889, lr_0 = 7.0596e-04
Loss = 1.4338e-01, PNorm = 63.9005, GNorm = 0.7568, lr_0 = 7.0548e-04
Loss = 1.6573e-01, PNorm = 63.9251, GNorm = 1.8151, lr_0 = 7.0499e-04
Loss = 1.6240e-01, PNorm = 63.9385, GNorm = 0.5259, lr_0 = 7.0451e-04
Loss = 1.6877e-01, PNorm = 63.9619, GNorm = 0.7664, lr_0 = 7.0403e-04
Loss = 1.4945e-01, PNorm = 63.9775, GNorm = 0.6455, lr_0 = 7.0354e-04
Loss = 1.5839e-01, PNorm = 63.9935, GNorm = 1.1432, lr_0 = 7.0306e-04
Loss = 1.7234e-01, PNorm = 64.0144, GNorm = 0.6091, lr_0 = 7.0258e-04
Loss = 1.6907e-01, PNorm = 64.0335, GNorm = 0.6740, lr_0 = 7.0210e-04
Loss = 1.5419e-01, PNorm = 64.0526, GNorm = 1.2843, lr_0 = 7.0162e-04
Loss = 1.5961e-01, PNorm = 64.0702, GNorm = 1.1641, lr_0 = 7.0114e-04
Loss = 1.4305e-01, PNorm = 64.0902, GNorm = 0.8433, lr_0 = 7.0066e-04
Loss = 1.5408e-01, PNorm = 64.1131, GNorm = 0.8789, lr_0 = 7.0018e-04
Loss = 1.7326e-01, PNorm = 64.1322, GNorm = 1.0709, lr_0 = 6.9970e-04
Loss = 1.8463e-01, PNorm = 64.1560, GNorm = 0.8197, lr_0 = 6.9922e-04
Loss = 1.7608e-01, PNorm = 64.1802, GNorm = 0.6104, lr_0 = 6.9874e-04
Loss = 1.6710e-01, PNorm = 64.2086, GNorm = 0.6404, lr_0 = 6.9826e-04
Loss = 1.8010e-01, PNorm = 64.2312, GNorm = 0.8667, lr_0 = 6.9778e-04
Loss = 1.4521e-01, PNorm = 64.2593, GNorm = 1.0344, lr_0 = 6.9730e-04
Loss = 1.5823e-01, PNorm = 64.2798, GNorm = 1.1038, lr_0 = 6.9683e-04
Loss = 1.5408e-01, PNorm = 64.2967, GNorm = 0.7532, lr_0 = 6.9635e-04
Loss = 1.8794e-01, PNorm = 64.3116, GNorm = 0.9876, lr_0 = 6.9587e-04
Loss = 1.6420e-01, PNorm = 64.3340, GNorm = 0.9697, lr_0 = 6.9540e-04
Loss = 1.6371e-01, PNorm = 64.3557, GNorm = 0.5392, lr_0 = 6.9492e-04
Loss = 1.5976e-01, PNorm = 64.3767, GNorm = 1.0891, lr_0 = 6.9444e-04
Loss = 1.6617e-01, PNorm = 64.4036, GNorm = 0.5394, lr_0 = 6.9397e-04
Loss = 1.6048e-01, PNorm = 64.4249, GNorm = 0.7868, lr_0 = 6.9349e-04
Loss = 1.8289e-01, PNorm = 64.4504, GNorm = 0.9390, lr_0 = 6.9302e-04
Loss = 1.5381e-01, PNorm = 64.4760, GNorm = 0.9109, lr_0 = 6.9254e-04
Loss = 1.7006e-01, PNorm = 64.4993, GNorm = 0.9874, lr_0 = 6.9207e-04
Loss = 1.8862e-01, PNorm = 64.5217, GNorm = 1.5823, lr_0 = 6.9159e-04
Loss = 1.8421e-01, PNorm = 64.5494, GNorm = 0.5751, lr_0 = 6.9112e-04
Loss = 1.6556e-01, PNorm = 64.5723, GNorm = 0.8211, lr_0 = 6.9065e-04
Loss = 1.5762e-01, PNorm = 64.5874, GNorm = 1.5553, lr_0 = 6.9017e-04
Loss = 1.7062e-01, PNorm = 64.6090, GNorm = 1.1698, lr_0 = 6.8970e-04
Loss = 1.6703e-01, PNorm = 64.6346, GNorm = 1.4162, lr_0 = 6.8923e-04
Loss = 1.7892e-01, PNorm = 64.6617, GNorm = 1.4572, lr_0 = 6.8876e-04
Loss = 1.8992e-01, PNorm = 64.6781, GNorm = 0.6947, lr_0 = 6.8828e-04
Loss = 1.6325e-01, PNorm = 64.6945, GNorm = 1.6185, lr_0 = 6.8781e-04
Loss = 1.8979e-01, PNorm = 64.7172, GNorm = 1.3210, lr_0 = 6.8734e-04
Loss = 1.7108e-01, PNorm = 64.7481, GNorm = 1.2221, lr_0 = 6.8687e-04
Loss = 1.4789e-01, PNorm = 64.7695, GNorm = 1.4703, lr_0 = 6.8640e-04
Loss = 1.6405e-01, PNorm = 64.7848, GNorm = 1.5297, lr_0 = 6.8593e-04
Loss = 1.8195e-01, PNorm = 64.8064, GNorm = 1.4168, lr_0 = 6.8546e-04
Loss = 1.6669e-01, PNorm = 64.8260, GNorm = 1.1048, lr_0 = 6.8499e-04
Loss = 1.8808e-01, PNorm = 64.8530, GNorm = 1.1088, lr_0 = 6.8452e-04
Loss = 1.6071e-01, PNorm = 64.8762, GNorm = 0.8259, lr_0 = 6.8405e-04
Loss = 1.5582e-01, PNorm = 64.9028, GNorm = 0.7115, lr_0 = 6.8358e-04
Loss = 1.5194e-01, PNorm = 64.9290, GNorm = 0.6285, lr_0 = 6.8312e-04
Loss = 1.7355e-01, PNorm = 64.9532, GNorm = 1.2449, lr_0 = 6.8265e-04
Loss = 1.7536e-01, PNorm = 64.9750, GNorm = 1.0612, lr_0 = 6.8218e-04
Loss = 1.9039e-01, PNorm = 64.9951, GNorm = 0.7912, lr_0 = 6.8171e-04
Loss = 1.5992e-01, PNorm = 65.0095, GNorm = 0.8799, lr_0 = 6.8125e-04
Loss = 1.5239e-01, PNorm = 65.0248, GNorm = 1.0224, lr_0 = 6.8078e-04
Loss = 1.8210e-01, PNorm = 65.0485, GNorm = 0.7619, lr_0 = 6.8031e-04
Loss = 1.6139e-01, PNorm = 65.0685, GNorm = 0.8912, lr_0 = 6.7985e-04
Loss = 1.6490e-01, PNorm = 65.0873, GNorm = 0.7296, lr_0 = 6.7938e-04
Loss = 1.7027e-01, PNorm = 65.1140, GNorm = 0.7908, lr_0 = 6.7892e-04
Loss = 1.5849e-01, PNorm = 65.1310, GNorm = 1.1043, lr_0 = 6.7845e-04
Loss = 1.6869e-01, PNorm = 65.1488, GNorm = 0.5341, lr_0 = 6.7799e-04
Loss = 1.6119e-01, PNorm = 65.1721, GNorm = 0.7755, lr_0 = 6.7752e-04
Loss = 1.4552e-01, PNorm = 65.1898, GNorm = 1.1548, lr_0 = 6.7706e-04
Loss = 1.4775e-01, PNorm = 65.2150, GNorm = 1.0026, lr_0 = 6.7659e-04
Loss = 1.6816e-01, PNorm = 65.2351, GNorm = 0.8031, lr_0 = 6.7613e-04
Loss = 1.7489e-01, PNorm = 65.2556, GNorm = 0.9864, lr_0 = 6.7567e-04
Loss = 1.6401e-01, PNorm = 65.2775, GNorm = 1.1876, lr_0 = 6.7520e-04
Loss = 1.7651e-01, PNorm = 65.3055, GNorm = 0.7901, lr_0 = 6.7474e-04
Loss = 1.5691e-01, PNorm = 65.3318, GNorm = 0.6743, lr_0 = 6.7428e-04
Loss = 1.7089e-01, PNorm = 65.3551, GNorm = 0.9386, lr_0 = 6.7382e-04
Loss = 1.5218e-01, PNorm = 65.3699, GNorm = 0.6308, lr_0 = 6.7335e-04
Loss = 1.6975e-01, PNorm = 65.3824, GNorm = 1.5270, lr_0 = 6.7289e-04
Loss = 1.6128e-01, PNorm = 65.3977, GNorm = 1.2140, lr_0 = 6.7243e-04
Loss = 1.5011e-01, PNorm = 65.4082, GNorm = 1.5029, lr_0 = 6.7197e-04
Loss = 1.8371e-01, PNorm = 65.4325, GNorm = 0.7390, lr_0 = 6.7151e-04
Loss = 1.5379e-01, PNorm = 65.4488, GNorm = 0.9112, lr_0 = 6.7105e-04
Loss = 1.7102e-01, PNorm = 65.4691, GNorm = 0.8456, lr_0 = 6.7059e-04
Loss = 1.6513e-01, PNorm = 65.5014, GNorm = 0.7847, lr_0 = 6.7013e-04
Loss = 1.7492e-01, PNorm = 65.5298, GNorm = 1.8697, lr_0 = 6.6967e-04
Loss = 1.7634e-01, PNorm = 65.5572, GNorm = 1.0779, lr_0 = 6.6921e-04
Loss = 1.7728e-01, PNorm = 65.5850, GNorm = 0.8378, lr_0 = 6.6876e-04
Loss = 1.6051e-01, PNorm = 65.6093, GNorm = 1.0195, lr_0 = 6.6830e-04
Loss = 1.6238e-01, PNorm = 65.6225, GNorm = 1.7174, lr_0 = 6.6784e-04
Loss = 1.9249e-01, PNorm = 65.6477, GNorm = 1.5055, lr_0 = 6.6738e-04
Loss = 1.8260e-01, PNorm = 65.6782, GNorm = 0.7486, lr_0 = 6.6693e-04
Loss = 1.6700e-01, PNorm = 65.7107, GNorm = 0.8421, lr_0 = 6.6647e-04
Loss = 1.7307e-01, PNorm = 65.7321, GNorm = 0.9921, lr_0 = 6.6601e-04
Loss = 1.4600e-01, PNorm = 65.7548, GNorm = 0.7316, lr_0 = 6.6556e-04
Loss = 1.6522e-01, PNorm = 65.7681, GNorm = 0.6578, lr_0 = 6.6510e-04
Loss = 1.8313e-01, PNorm = 65.7828, GNorm = 0.7240, lr_0 = 6.6464e-04
Loss = 1.6664e-01, PNorm = 65.8022, GNorm = 0.8220, lr_0 = 6.6419e-04
Loss = 1.3982e-01, PNorm = 65.8232, GNorm = 0.8682, lr_0 = 6.6373e-04
Loss = 1.6347e-01, PNorm = 65.8359, GNorm = 0.8088, lr_0 = 6.6328e-04
Loss = 1.6955e-01, PNorm = 65.8612, GNorm = 0.6845, lr_0 = 6.6282e-04
Validation mae = 0.258393
Epoch 7
Loss = 1.7570e-01, PNorm = 65.8782, GNorm = 0.7882, lr_0 = 6.6237e-04
Loss = 1.2989e-01, PNorm = 65.9025, GNorm = 0.7605, lr_0 = 6.6192e-04
Loss = 1.5197e-01, PNorm = 65.9147, GNorm = 0.7508, lr_0 = 6.6146e-04
Loss = 1.3367e-01, PNorm = 65.9364, GNorm = 0.9412, lr_0 = 6.6101e-04
Loss = 1.4232e-01, PNorm = 65.9519, GNorm = 1.6447, lr_0 = 6.6056e-04
Loss = 1.5253e-01, PNorm = 65.9668, GNorm = 0.9859, lr_0 = 6.6011e-04
Loss = 1.5746e-01, PNorm = 65.9949, GNorm = 1.3655, lr_0 = 6.5965e-04
Loss = 1.6487e-01, PNorm = 66.0188, GNorm = 1.3529, lr_0 = 6.5920e-04
Loss = 1.4471e-01, PNorm = 66.0430, GNorm = 0.6221, lr_0 = 6.5875e-04
Loss = 1.6593e-01, PNorm = 66.0619, GNorm = 1.4469, lr_0 = 6.5830e-04
Loss = 1.6395e-01, PNorm = 66.0977, GNorm = 0.5923, lr_0 = 6.5785e-04
Loss = 1.7066e-01, PNorm = 66.1303, GNorm = 1.4747, lr_0 = 6.5740e-04
Loss = 1.4448e-01, PNorm = 66.1625, GNorm = 0.6229, lr_0 = 6.5695e-04
Loss = 1.4495e-01, PNorm = 66.1860, GNorm = 0.6788, lr_0 = 6.5650e-04
Loss = 1.4976e-01, PNorm = 66.2076, GNorm = 0.6313, lr_0 = 6.5605e-04
Loss = 1.5657e-01, PNorm = 66.2231, GNorm = 0.7624, lr_0 = 6.5560e-04
Loss = 1.6250e-01, PNorm = 66.2447, GNorm = 0.6022, lr_0 = 6.5515e-04
Loss = 1.5278e-01, PNorm = 66.2609, GNorm = 0.5815, lr_0 = 6.5470e-04
Loss = 1.5988e-01, PNorm = 66.2757, GNorm = 1.4275, lr_0 = 6.5425e-04
Loss = 1.6458e-01, PNorm = 66.2905, GNorm = 0.9276, lr_0 = 6.5380e-04
Loss = 1.6260e-01, PNorm = 66.3087, GNorm = 1.1055, lr_0 = 6.5335e-04
Loss = 1.6281e-01, PNorm = 66.3240, GNorm = 1.1387, lr_0 = 6.5291e-04
Loss = 1.5228e-01, PNorm = 66.3475, GNorm = 1.0102, lr_0 = 6.5246e-04
Loss = 1.4441e-01, PNorm = 66.3732, GNorm = 0.8542, lr_0 = 6.5201e-04
Loss = 1.5556e-01, PNorm = 66.3992, GNorm = 0.4853, lr_0 = 6.5157e-04
Loss = 1.5548e-01, PNorm = 66.4129, GNorm = 0.7539, lr_0 = 6.5112e-04
Loss = 1.3929e-01, PNorm = 66.4297, GNorm = 0.7010, lr_0 = 6.5067e-04
Loss = 1.6793e-01, PNorm = 66.4436, GNorm = 0.8847, lr_0 = 6.5023e-04
Loss = 1.6476e-01, PNorm = 66.4646, GNorm = 1.4144, lr_0 = 6.4978e-04
Loss = 1.3450e-01, PNorm = 66.4833, GNorm = 0.5938, lr_0 = 6.4934e-04
Loss = 1.7112e-01, PNorm = 66.5055, GNorm = 1.6417, lr_0 = 6.4889e-04
Loss = 1.5454e-01, PNorm = 66.5244, GNorm = 1.3131, lr_0 = 6.4845e-04
Loss = 1.5254e-01, PNorm = 66.5442, GNorm = 1.4163, lr_0 = 6.4800e-04
Loss = 1.3959e-01, PNorm = 66.5624, GNorm = 0.9224, lr_0 = 6.4756e-04
Loss = 1.5747e-01, PNorm = 66.5855, GNorm = 0.6783, lr_0 = 6.4712e-04
Loss = 1.4841e-01, PNorm = 66.6046, GNorm = 1.6599, lr_0 = 6.4667e-04
Loss = 1.7063e-01, PNorm = 66.6297, GNorm = 1.0239, lr_0 = 6.4623e-04
Loss = 1.6882e-01, PNorm = 66.6613, GNorm = 0.9000, lr_0 = 6.4579e-04
Loss = 1.4706e-01, PNorm = 66.6901, GNorm = 0.5903, lr_0 = 6.4534e-04
Loss = 1.5641e-01, PNorm = 66.7144, GNorm = 1.2528, lr_0 = 6.4490e-04
Loss = 1.4821e-01, PNorm = 66.7339, GNorm = 0.7036, lr_0 = 6.4446e-04
Loss = 1.4445e-01, PNorm = 66.7459, GNorm = 0.8937, lr_0 = 6.4402e-04
Loss = 1.7007e-01, PNorm = 66.7656, GNorm = 0.8734, lr_0 = 6.4358e-04
Loss = 1.4669e-01, PNorm = 66.7854, GNorm = 0.7965, lr_0 = 6.4314e-04
Loss = 1.5184e-01, PNorm = 66.8115, GNorm = 1.1729, lr_0 = 6.4270e-04
Loss = 1.6152e-01, PNorm = 66.8256, GNorm = 0.6804, lr_0 = 6.4226e-04
Loss = 1.5787e-01, PNorm = 66.8457, GNorm = 0.9495, lr_0 = 6.4182e-04
Loss = 1.4997e-01, PNorm = 66.8647, GNorm = 0.9767, lr_0 = 6.4138e-04
Loss = 1.7274e-01, PNorm = 66.8903, GNorm = 0.8226, lr_0 = 6.4094e-04
Loss = 1.6750e-01, PNorm = 66.9108, GNorm = 1.2352, lr_0 = 6.4050e-04
Loss = 1.6195e-01, PNorm = 66.9327, GNorm = 1.1089, lr_0 = 6.4006e-04
Loss = 1.4285e-01, PNorm = 66.9566, GNorm = 0.6329, lr_0 = 6.3962e-04
Loss = 1.4527e-01, PNorm = 66.9748, GNorm = 0.8262, lr_0 = 6.3918e-04
Loss = 1.5707e-01, PNorm = 66.9934, GNorm = 0.7749, lr_0 = 6.3874e-04
Loss = 1.7270e-01, PNorm = 67.0113, GNorm = 0.7271, lr_0 = 6.3831e-04
Loss = 1.6632e-01, PNorm = 67.0325, GNorm = 0.8119, lr_0 = 6.3787e-04
Loss = 1.5301e-01, PNorm = 67.0504, GNorm = 0.9714, lr_0 = 6.3743e-04
Loss = 1.5526e-01, PNorm = 67.0743, GNorm = 0.7029, lr_0 = 6.3700e-04
Loss = 1.5066e-01, PNorm = 67.1005, GNorm = 1.2501, lr_0 = 6.3656e-04
Loss = 1.5248e-01, PNorm = 67.1264, GNorm = 1.0002, lr_0 = 6.3612e-04
Loss = 1.6326e-01, PNorm = 67.1573, GNorm = 0.9954, lr_0 = 6.3569e-04
Loss = 1.6566e-01, PNorm = 67.1801, GNorm = 0.9159, lr_0 = 6.3525e-04
Loss = 1.4463e-01, PNorm = 67.2013, GNorm = 0.6165, lr_0 = 6.3482e-04
Loss = 1.6863e-01, PNorm = 67.2245, GNorm = 0.5906, lr_0 = 6.3438e-04
Loss = 1.3487e-01, PNorm = 67.2451, GNorm = 0.5599, lr_0 = 6.3395e-04
Loss = 1.5023e-01, PNorm = 67.2599, GNorm = 0.6071, lr_0 = 6.3351e-04
Loss = 1.5508e-01, PNorm = 67.2800, GNorm = 0.7331, lr_0 = 6.3308e-04
Loss = 1.7944e-01, PNorm = 67.2956, GNorm = 0.6897, lr_0 = 6.3265e-04
Loss = 1.7691e-01, PNorm = 67.3127, GNorm = 0.9347, lr_0 = 6.3221e-04
Loss = 1.5245e-01, PNorm = 67.3231, GNorm = 0.5382, lr_0 = 6.3178e-04
Loss = 1.5731e-01, PNorm = 67.3488, GNorm = 0.6686, lr_0 = 6.3135e-04
Loss = 1.6836e-01, PNorm = 67.3684, GNorm = 0.7330, lr_0 = 6.3091e-04
Loss = 1.5063e-01, PNorm = 67.3944, GNorm = 1.3122, lr_0 = 6.3048e-04
Loss = 1.5375e-01, PNorm = 67.4120, GNorm = 0.6148, lr_0 = 6.3005e-04
Loss = 1.4007e-01, PNorm = 67.4303, GNorm = 0.6955, lr_0 = 6.2962e-04
Loss = 1.3994e-01, PNorm = 67.4469, GNorm = 0.6535, lr_0 = 6.2919e-04
Loss = 1.6164e-01, PNorm = 67.4624, GNorm = 1.2287, lr_0 = 6.2876e-04
Loss = 1.8153e-01, PNorm = 67.4791, GNorm = 0.9075, lr_0 = 6.2833e-04
Loss = 1.4682e-01, PNorm = 67.5046, GNorm = 1.1470, lr_0 = 6.2789e-04
Loss = 1.5696e-01, PNorm = 67.5188, GNorm = 1.0292, lr_0 = 6.2746e-04
Loss = 1.4656e-01, PNorm = 67.5454, GNorm = 0.6584, lr_0 = 6.2703e-04
Loss = 1.6231e-01, PNorm = 67.5648, GNorm = 0.9976, lr_0 = 6.2661e-04
Loss = 1.4892e-01, PNorm = 67.5956, GNorm = 0.8956, lr_0 = 6.2618e-04
Loss = 1.4134e-01, PNorm = 67.6189, GNorm = 0.8386, lr_0 = 6.2575e-04
Loss = 1.6654e-01, PNorm = 67.6395, GNorm = 0.6897, lr_0 = 6.2532e-04
Loss = 1.3533e-01, PNorm = 67.6548, GNorm = 0.7938, lr_0 = 6.2489e-04
Loss = 1.5331e-01, PNorm = 67.6655, GNorm = 0.8755, lr_0 = 6.2446e-04
Loss = 1.6902e-01, PNorm = 67.6823, GNorm = 0.7088, lr_0 = 6.2403e-04
Loss = 1.5078e-01, PNorm = 67.6998, GNorm = 0.7075, lr_0 = 6.2361e-04
Loss = 1.6625e-01, PNorm = 67.7160, GNorm = 1.0930, lr_0 = 6.2318e-04
Loss = 1.3453e-01, PNorm = 67.7343, GNorm = 0.6870, lr_0 = 6.2275e-04
Loss = 1.5485e-01, PNorm = 67.7486, GNorm = 0.7212, lr_0 = 6.2233e-04
Loss = 1.4862e-01, PNorm = 67.7706, GNorm = 0.6059, lr_0 = 6.2190e-04
Loss = 1.6010e-01, PNorm = 67.7853, GNorm = 1.1668, lr_0 = 6.2147e-04
Loss = 1.5763e-01, PNorm = 67.8004, GNorm = 1.2329, lr_0 = 6.2105e-04
Loss = 1.5666e-01, PNorm = 67.8153, GNorm = 0.8762, lr_0 = 6.2062e-04
Loss = 1.6321e-01, PNorm = 67.8331, GNorm = 0.7038, lr_0 = 6.2020e-04
Loss = 1.5163e-01, PNorm = 67.8490, GNorm = 0.9086, lr_0 = 6.1977e-04
Loss = 1.5209e-01, PNorm = 67.8625, GNorm = 0.8918, lr_0 = 6.1935e-04
Loss = 1.5418e-01, PNorm = 67.8855, GNorm = 0.9287, lr_0 = 6.1892e-04
Loss = 1.4309e-01, PNorm = 67.9065, GNorm = 0.7813, lr_0 = 6.1850e-04
Loss = 1.5272e-01, PNorm = 67.9218, GNorm = 0.6461, lr_0 = 6.1808e-04
Loss = 1.6240e-01, PNorm = 67.9375, GNorm = 0.8464, lr_0 = 6.1765e-04
Loss = 1.5187e-01, PNorm = 67.9548, GNorm = 0.5628, lr_0 = 6.1723e-04
Loss = 1.4221e-01, PNorm = 67.9716, GNorm = 0.6406, lr_0 = 6.1681e-04
Loss = 1.6651e-01, PNorm = 67.9918, GNorm = 0.7754, lr_0 = 6.1638e-04
Loss = 1.4985e-01, PNorm = 68.0128, GNorm = 0.7156, lr_0 = 6.1596e-04
Loss = 1.5292e-01, PNorm = 68.0365, GNorm = 1.3746, lr_0 = 6.1554e-04
Loss = 1.5966e-01, PNorm = 68.0560, GNorm = 0.5877, lr_0 = 6.1512e-04
Loss = 1.6619e-01, PNorm = 68.0776, GNorm = 0.6122, lr_0 = 6.1470e-04
Loss = 1.6430e-01, PNorm = 68.0947, GNorm = 0.7030, lr_0 = 6.1428e-04
Loss = 1.7931e-01, PNorm = 68.1141, GNorm = 1.0626, lr_0 = 6.1385e-04
Loss = 1.6088e-01, PNorm = 68.1321, GNorm = 1.3876, lr_0 = 6.1343e-04
Loss = 1.5308e-01, PNorm = 68.1548, GNorm = 0.7568, lr_0 = 6.1301e-04
Loss = 1.3567e-01, PNorm = 68.1768, GNorm = 0.6095, lr_0 = 6.1259e-04
Loss = 1.7031e-01, PNorm = 68.2003, GNorm = 1.9175, lr_0 = 6.1217e-04
Loss = 1.6450e-01, PNorm = 68.2184, GNorm = 0.5754, lr_0 = 6.1175e-04
Loss = 1.4123e-01, PNorm = 68.2442, GNorm = 0.6720, lr_0 = 6.1134e-04
Loss = 1.4782e-01, PNorm = 68.2665, GNorm = 0.8838, lr_0 = 6.1092e-04
Loss = 1.4109e-01, PNorm = 68.2827, GNorm = 0.7903, lr_0 = 6.1050e-04
Validation mae = 0.245644
Epoch 8
Loss = 1.4397e-01, PNorm = 68.3123, GNorm = 1.3872, lr_0 = 6.1008e-04
Loss = 1.2292e-01, PNorm = 68.3329, GNorm = 0.6209, lr_0 = 6.0966e-04
Loss = 1.4788e-01, PNorm = 68.3477, GNorm = 0.6502, lr_0 = 6.0924e-04
Loss = 1.4056e-01, PNorm = 68.3599, GNorm = 0.5243, lr_0 = 6.0883e-04
Loss = 1.4426e-01, PNorm = 68.3815, GNorm = 0.6343, lr_0 = 6.0841e-04
Loss = 1.3390e-01, PNorm = 68.4056, GNorm = 1.0738, lr_0 = 6.0799e-04
Loss = 1.4330e-01, PNorm = 68.4246, GNorm = 0.7948, lr_0 = 6.0758e-04
Loss = 1.3029e-01, PNorm = 68.4443, GNorm = 0.6527, lr_0 = 6.0716e-04
Loss = 1.2028e-01, PNorm = 68.4583, GNorm = 1.0309, lr_0 = 6.0674e-04
Loss = 1.6141e-01, PNorm = 68.4714, GNorm = 0.6993, lr_0 = 6.0633e-04
Loss = 1.4383e-01, PNorm = 68.4867, GNorm = 0.6756, lr_0 = 6.0591e-04
Loss = 1.5842e-01, PNorm = 68.5135, GNorm = 1.0798, lr_0 = 6.0550e-04
Loss = 1.4421e-01, PNorm = 68.5308, GNorm = 0.8415, lr_0 = 6.0508e-04
Loss = 1.3462e-01, PNorm = 68.5501, GNorm = 1.3733, lr_0 = 6.0467e-04
Loss = 1.6357e-01, PNorm = 68.5696, GNorm = 0.8076, lr_0 = 6.0425e-04
Loss = 1.4189e-01, PNorm = 68.5906, GNorm = 1.4094, lr_0 = 6.0384e-04
Loss = 1.5645e-01, PNorm = 68.6064, GNorm = 0.8146, lr_0 = 6.0343e-04
Loss = 1.4139e-01, PNorm = 68.6332, GNorm = 1.0930, lr_0 = 6.0301e-04
Loss = 1.6972e-01, PNorm = 68.6540, GNorm = 0.8399, lr_0 = 6.0260e-04
Loss = 1.5337e-01, PNorm = 68.6759, GNorm = 1.2464, lr_0 = 6.0219e-04
Loss = 1.6662e-01, PNorm = 68.6990, GNorm = 0.6123, lr_0 = 6.0178e-04
Loss = 1.4703e-01, PNorm = 68.7210, GNorm = 0.7024, lr_0 = 6.0136e-04
Loss = 1.4769e-01, PNorm = 68.7478, GNorm = 0.6935, lr_0 = 6.0095e-04
Loss = 1.6350e-01, PNorm = 68.7701, GNorm = 0.5663, lr_0 = 6.0054e-04
Loss = 1.5457e-01, PNorm = 68.7890, GNorm = 0.5833, lr_0 = 6.0013e-04
Loss = 1.5261e-01, PNorm = 68.8110, GNorm = 0.7033, lr_0 = 5.9972e-04
Loss = 1.6562e-01, PNorm = 68.8350, GNorm = 0.8613, lr_0 = 5.9931e-04
Loss = 1.4667e-01, PNorm = 68.8581, GNorm = 0.5322, lr_0 = 5.9890e-04
Loss = 1.8833e-01, PNorm = 68.8727, GNorm = 0.8396, lr_0 = 5.9849e-04
Loss = 1.5809e-01, PNorm = 68.8903, GNorm = 0.5644, lr_0 = 5.9808e-04
Loss = 1.4609e-01, PNorm = 68.9174, GNorm = 1.3306, lr_0 = 5.9767e-04
Loss = 1.5565e-01, PNorm = 68.9394, GNorm = 0.9814, lr_0 = 5.9726e-04
Loss = 1.3549e-01, PNorm = 68.9540, GNorm = 0.5316, lr_0 = 5.9685e-04
Loss = 1.5214e-01, PNorm = 68.9755, GNorm = 0.7314, lr_0 = 5.9644e-04
Loss = 1.5177e-01, PNorm = 68.9949, GNorm = 1.1772, lr_0 = 5.9603e-04
Loss = 1.7223e-01, PNorm = 69.0172, GNorm = 0.7289, lr_0 = 5.9562e-04
Loss = 1.6296e-01, PNorm = 69.0380, GNorm = 0.5849, lr_0 = 5.9521e-04
Loss = 1.5221e-01, PNorm = 69.0578, GNorm = 0.8183, lr_0 = 5.9481e-04
Loss = 1.6265e-01, PNorm = 69.0777, GNorm = 0.9498, lr_0 = 5.9440e-04
Loss = 1.4913e-01, PNorm = 69.0904, GNorm = 0.6642, lr_0 = 5.9399e-04
Loss = 1.4138e-01, PNorm = 69.1111, GNorm = 0.4799, lr_0 = 5.9358e-04
Loss = 1.4229e-01, PNorm = 69.1251, GNorm = 0.6289, lr_0 = 5.9318e-04
Loss = 1.4762e-01, PNorm = 69.1425, GNorm = 1.0953, lr_0 = 5.9277e-04
Loss = 1.3671e-01, PNorm = 69.1578, GNorm = 0.6341, lr_0 = 5.9236e-04
Loss = 1.5226e-01, PNorm = 69.1771, GNorm = 0.8252, lr_0 = 5.9196e-04
Loss = 1.3539e-01, PNorm = 69.1979, GNorm = 0.9687, lr_0 = 5.9155e-04
Loss = 1.6648e-01, PNorm = 69.2172, GNorm = 0.7954, lr_0 = 5.9115e-04
Loss = 1.5842e-01, PNorm = 69.2406, GNorm = 0.6751, lr_0 = 5.9074e-04
Loss = 1.5291e-01, PNorm = 69.2677, GNorm = 0.9788, lr_0 = 5.9034e-04
Loss = 1.4990e-01, PNorm = 69.2864, GNorm = 0.8514, lr_0 = 5.8993e-04
Loss = 1.3028e-01, PNorm = 69.3057, GNorm = 0.6167, lr_0 = 5.8953e-04
Loss = 1.2735e-01, PNorm = 69.3273, GNorm = 1.2895, lr_0 = 5.8913e-04
Loss = 1.5428e-01, PNorm = 69.3446, GNorm = 0.8791, lr_0 = 5.8872e-04
Loss = 1.4562e-01, PNorm = 69.3579, GNorm = 0.8872, lr_0 = 5.8832e-04
Loss = 1.3289e-01, PNorm = 69.3755, GNorm = 0.5458, lr_0 = 5.8792e-04
Loss = 1.2923e-01, PNorm = 69.3926, GNorm = 0.7549, lr_0 = 5.8751e-04
Loss = 1.5473e-01, PNorm = 69.4165, GNorm = 0.4764, lr_0 = 5.8711e-04
Loss = 1.4584e-01, PNorm = 69.4364, GNorm = 0.8381, lr_0 = 5.8671e-04
Loss = 1.5387e-01, PNorm = 69.4515, GNorm = 1.7035, lr_0 = 5.8631e-04
Loss = 1.3848e-01, PNorm = 69.4643, GNorm = 0.7268, lr_0 = 5.8591e-04
Loss = 1.4431e-01, PNorm = 69.4810, GNorm = 0.8596, lr_0 = 5.8550e-04
Loss = 1.2569e-01, PNorm = 69.4935, GNorm = 0.9060, lr_0 = 5.8510e-04
Loss = 1.3923e-01, PNorm = 69.5081, GNorm = 1.1034, lr_0 = 5.8470e-04
Loss = 1.4844e-01, PNorm = 69.5337, GNorm = 0.7090, lr_0 = 5.8430e-04
Loss = 1.3013e-01, PNorm = 69.5562, GNorm = 0.6935, lr_0 = 5.8390e-04
Loss = 1.4881e-01, PNorm = 69.5719, GNorm = 1.7425, lr_0 = 5.8350e-04
Loss = 1.4385e-01, PNorm = 69.5880, GNorm = 0.5405, lr_0 = 5.8310e-04
Loss = 1.2480e-01, PNorm = 69.6145, GNorm = 0.6810, lr_0 = 5.8270e-04
Loss = 1.5520e-01, PNorm = 69.6280, GNorm = 0.7093, lr_0 = 5.8230e-04
Loss = 1.4846e-01, PNorm = 69.6456, GNorm = 0.7836, lr_0 = 5.8190e-04
Loss = 1.4225e-01, PNorm = 69.6632, GNorm = 0.7463, lr_0 = 5.8151e-04
Loss = 1.3948e-01, PNorm = 69.6854, GNorm = 1.3301, lr_0 = 5.8111e-04
Loss = 1.4557e-01, PNorm = 69.7016, GNorm = 0.9400, lr_0 = 5.8071e-04
Loss = 1.4025e-01, PNorm = 69.7189, GNorm = 0.7916, lr_0 = 5.8031e-04
Loss = 1.5572e-01, PNorm = 69.7323, GNorm = 0.6061, lr_0 = 5.7991e-04
Loss = 1.6050e-01, PNorm = 69.7439, GNorm = 1.2923, lr_0 = 5.7952e-04
Loss = 1.6457e-01, PNorm = 69.7638, GNorm = 0.5836, lr_0 = 5.7912e-04
Loss = 1.4455e-01, PNorm = 69.7794, GNorm = 0.9383, lr_0 = 5.7872e-04
Loss = 1.4557e-01, PNorm = 69.7996, GNorm = 0.9039, lr_0 = 5.7833e-04
Loss = 1.4459e-01, PNorm = 69.8127, GNorm = 0.8087, lr_0 = 5.7793e-04
Loss = 1.2729e-01, PNorm = 69.8268, GNorm = 0.8050, lr_0 = 5.7753e-04
Loss = 1.3950e-01, PNorm = 69.8486, GNorm = 0.8509, lr_0 = 5.7714e-04
Loss = 1.4978e-01, PNorm = 69.8639, GNorm = 1.0282, lr_0 = 5.7674e-04
Loss = 1.4999e-01, PNorm = 69.8800, GNorm = 0.9239, lr_0 = 5.7635e-04
Loss = 1.5600e-01, PNorm = 69.8982, GNorm = 0.8206, lr_0 = 5.7595e-04
Loss = 1.5560e-01, PNorm = 69.9199, GNorm = 0.6115, lr_0 = 5.7556e-04
Loss = 1.6557e-01, PNorm = 69.9403, GNorm = 1.1602, lr_0 = 5.7516e-04
Loss = 1.7652e-01, PNorm = 69.9547, GNorm = 0.9005, lr_0 = 5.7477e-04
Loss = 1.5466e-01, PNorm = 69.9730, GNorm = 1.6317, lr_0 = 5.7438e-04
Loss = 1.7017e-01, PNorm = 69.9927, GNorm = 1.2584, lr_0 = 5.7398e-04
Loss = 1.7211e-01, PNorm = 70.0232, GNorm = 1.6611, lr_0 = 5.7359e-04
Loss = 1.5033e-01, PNorm = 70.0522, GNorm = 0.8187, lr_0 = 5.7320e-04
Loss = 1.5563e-01, PNorm = 70.0818, GNorm = 0.6827, lr_0 = 5.7280e-04
Loss = 1.7051e-01, PNorm = 70.1024, GNorm = 1.1042, lr_0 = 5.7241e-04
Loss = 1.5098e-01, PNorm = 70.1204, GNorm = 0.5502, lr_0 = 5.7202e-04
Loss = 1.5489e-01, PNorm = 70.1347, GNorm = 0.6654, lr_0 = 5.7163e-04
Loss = 1.5393e-01, PNorm = 70.1553, GNorm = 1.3954, lr_0 = 5.7124e-04
Loss = 1.6868e-01, PNorm = 70.1753, GNorm = 1.7409, lr_0 = 5.7084e-04
Loss = 1.6115e-01, PNorm = 70.2032, GNorm = 1.5581, lr_0 = 5.7045e-04
Loss = 1.5858e-01, PNorm = 70.2205, GNorm = 1.4273, lr_0 = 5.7006e-04
Loss = 1.5901e-01, PNorm = 70.2391, GNorm = 0.5760, lr_0 = 5.6967e-04
Loss = 1.3680e-01, PNorm = 70.2522, GNorm = 0.7629, lr_0 = 5.6928e-04
Loss = 1.4851e-01, PNorm = 70.2703, GNorm = 1.1426, lr_0 = 5.6889e-04
Loss = 1.5479e-01, PNorm = 70.2976, GNorm = 1.3403, lr_0 = 5.6850e-04
Loss = 1.5040e-01, PNorm = 70.3133, GNorm = 1.2585, lr_0 = 5.6811e-04
Loss = 1.4985e-01, PNorm = 70.3347, GNorm = 0.7580, lr_0 = 5.6772e-04
Loss = 1.3780e-01, PNorm = 70.3479, GNorm = 0.6013, lr_0 = 5.6733e-04
Loss = 1.5212e-01, PNorm = 70.3609, GNorm = 0.5962, lr_0 = 5.6695e-04
Loss = 1.6729e-01, PNorm = 70.3743, GNorm = 0.5417, lr_0 = 5.6656e-04
Loss = 1.4055e-01, PNorm = 70.3957, GNorm = 1.3224, lr_0 = 5.6617e-04
Loss = 1.5309e-01, PNorm = 70.4113, GNorm = 1.4354, lr_0 = 5.6578e-04
Loss = 1.6204e-01, PNorm = 70.4368, GNorm = 0.6751, lr_0 = 5.6539e-04
Loss = 1.4285e-01, PNorm = 70.4549, GNorm = 0.6475, lr_0 = 5.6501e-04
Loss = 1.3021e-01, PNorm = 70.4723, GNorm = 0.8408, lr_0 = 5.6462e-04
Loss = 1.5941e-01, PNorm = 70.4863, GNorm = 0.6554, lr_0 = 5.6423e-04
Loss = 1.5465e-01, PNorm = 70.5089, GNorm = 0.6847, lr_0 = 5.6385e-04
Loss = 1.4063e-01, PNorm = 70.5197, GNorm = 0.6610, lr_0 = 5.6346e-04
Loss = 1.5091e-01, PNorm = 70.5404, GNorm = 1.7958, lr_0 = 5.6307e-04
Loss = 1.7219e-01, PNorm = 70.5549, GNorm = 0.7790, lr_0 = 5.6269e-04
Loss = 1.4757e-01, PNorm = 70.5748, GNorm = 1.2473, lr_0 = 5.6230e-04
Validation mae = 0.252769
Epoch 9
Loss = 1.4755e-01, PNorm = 70.5974, GNorm = 1.3693, lr_0 = 5.6192e-04
Loss = 1.3786e-01, PNorm = 70.6209, GNorm = 0.5114, lr_0 = 5.6153e-04
Loss = 1.4098e-01, PNorm = 70.6351, GNorm = 0.7407, lr_0 = 5.6115e-04
Loss = 1.5575e-01, PNorm = 70.6552, GNorm = 0.6097, lr_0 = 5.6076e-04
Loss = 1.2134e-01, PNorm = 70.6747, GNorm = 0.5995, lr_0 = 5.6038e-04
Loss = 1.3310e-01, PNorm = 70.6965, GNorm = 0.5729, lr_0 = 5.6000e-04
Loss = 1.3066e-01, PNorm = 70.7125, GNorm = 0.7706, lr_0 = 5.5961e-04
Loss = 1.1737e-01, PNorm = 70.7246, GNorm = 0.5544, lr_0 = 5.5923e-04
Loss = 1.2593e-01, PNorm = 70.7379, GNorm = 0.8479, lr_0 = 5.5885e-04
Loss = 1.4425e-01, PNorm = 70.7548, GNorm = 0.6864, lr_0 = 5.5846e-04
Loss = 1.4752e-01, PNorm = 70.7759, GNorm = 0.8467, lr_0 = 5.5808e-04
Loss = 1.2552e-01, PNorm = 70.7950, GNorm = 0.6263, lr_0 = 5.5770e-04
Loss = 1.4858e-01, PNorm = 70.8122, GNorm = 0.7016, lr_0 = 5.5732e-04
Loss = 1.2673e-01, PNorm = 70.8286, GNorm = 0.5899, lr_0 = 5.5693e-04
Loss = 1.1283e-01, PNorm = 70.8488, GNorm = 0.8385, lr_0 = 5.5655e-04
Loss = 1.4075e-01, PNorm = 70.8696, GNorm = 0.5346, lr_0 = 5.5617e-04
Loss = 1.2522e-01, PNorm = 70.8901, GNorm = 0.6774, lr_0 = 5.5579e-04
Loss = 1.2766e-01, PNorm = 70.9016, GNorm = 0.6709, lr_0 = 5.5541e-04
Loss = 1.4373e-01, PNorm = 70.9204, GNorm = 0.9948, lr_0 = 5.5503e-04
Loss = 1.4239e-01, PNorm = 70.9417, GNorm = 0.6697, lr_0 = 5.5465e-04
Loss = 1.5240e-01, PNorm = 70.9633, GNorm = 0.6136, lr_0 = 5.5427e-04
Loss = 1.2936e-01, PNorm = 70.9821, GNorm = 1.0029, lr_0 = 5.5389e-04
Loss = 1.3438e-01, PNorm = 70.9952, GNorm = 0.6781, lr_0 = 5.5351e-04
Loss = 1.2995e-01, PNorm = 71.0134, GNorm = 0.7843, lr_0 = 5.5313e-04
Loss = 1.3695e-01, PNorm = 71.0268, GNorm = 0.7344, lr_0 = 5.5275e-04
Loss = 1.1735e-01, PNorm = 71.0367, GNorm = 1.1593, lr_0 = 5.5237e-04
Loss = 1.2482e-01, PNorm = 71.0419, GNorm = 0.5472, lr_0 = 5.5199e-04
Loss = 1.1729e-01, PNorm = 71.0576, GNorm = 0.6075, lr_0 = 5.5162e-04
Loss = 1.5097e-01, PNorm = 71.0746, GNorm = 0.5576, lr_0 = 5.5124e-04
Loss = 1.6526e-01, PNorm = 71.0946, GNorm = 0.8018, lr_0 = 5.5086e-04
Loss = 1.4427e-01, PNorm = 71.1102, GNorm = 1.2851, lr_0 = 5.5048e-04
Loss = 1.2174e-01, PNorm = 71.1256, GNorm = 0.6002, lr_0 = 5.5011e-04
Loss = 1.4148e-01, PNorm = 71.1418, GNorm = 0.6019, lr_0 = 5.4973e-04
Loss = 1.4161e-01, PNorm = 71.1620, GNorm = 1.0791, lr_0 = 5.4935e-04
Loss = 1.4299e-01, PNorm = 71.1813, GNorm = 0.5094, lr_0 = 5.4898e-04
Loss = 1.5104e-01, PNorm = 71.1984, GNorm = 0.8479, lr_0 = 5.4860e-04
Loss = 1.4552e-01, PNorm = 71.2171, GNorm = 0.8524, lr_0 = 5.4822e-04
Loss = 1.4934e-01, PNorm = 71.2348, GNorm = 1.2313, lr_0 = 5.4785e-04
Loss = 1.4114e-01, PNorm = 71.2518, GNorm = 0.5034, lr_0 = 5.4747e-04
Loss = 1.4178e-01, PNorm = 71.2656, GNorm = 0.6219, lr_0 = 5.4710e-04
Loss = 1.4222e-01, PNorm = 71.2863, GNorm = 1.0825, lr_0 = 5.4672e-04
Loss = 1.3749e-01, PNorm = 71.3074, GNorm = 0.4762, lr_0 = 5.4635e-04
Loss = 1.3775e-01, PNorm = 71.3284, GNorm = 1.2321, lr_0 = 5.4597e-04
Loss = 1.5016e-01, PNorm = 71.3462, GNorm = 0.5501, lr_0 = 5.4560e-04
Loss = 1.3931e-01, PNorm = 71.3572, GNorm = 0.7133, lr_0 = 5.4523e-04
Loss = 1.4524e-01, PNorm = 71.3635, GNorm = 0.6240, lr_0 = 5.4485e-04
Loss = 1.4075e-01, PNorm = 71.3765, GNorm = 0.6990, lr_0 = 5.4448e-04
Loss = 1.3315e-01, PNorm = 71.3890, GNorm = 0.6423, lr_0 = 5.4411e-04
Loss = 1.4453e-01, PNorm = 71.4031, GNorm = 0.6331, lr_0 = 5.4373e-04
Loss = 1.2941e-01, PNorm = 71.4244, GNorm = 0.7517, lr_0 = 5.4336e-04
Loss = 1.3401e-01, PNorm = 71.4435, GNorm = 0.6327, lr_0 = 5.4299e-04
Loss = 1.5856e-01, PNorm = 71.4605, GNorm = 0.9092, lr_0 = 5.4262e-04
Loss = 1.6614e-01, PNorm = 71.4774, GNorm = 0.7283, lr_0 = 5.4225e-04
Loss = 1.6437e-01, PNorm = 71.4967, GNorm = 0.6888, lr_0 = 5.4187e-04
Loss = 1.4562e-01, PNorm = 71.5195, GNorm = 1.0016, lr_0 = 5.4150e-04
Loss = 1.4844e-01, PNorm = 71.5343, GNorm = 0.7066, lr_0 = 5.4113e-04
Loss = 1.5103e-01, PNorm = 71.5542, GNorm = 0.6563, lr_0 = 5.4076e-04
Loss = 1.4462e-01, PNorm = 71.5778, GNorm = 0.8357, lr_0 = 5.4039e-04
Loss = 1.4872e-01, PNorm = 71.5963, GNorm = 0.7070, lr_0 = 5.4002e-04
Loss = 1.3633e-01, PNorm = 71.6106, GNorm = 0.8313, lr_0 = 5.3965e-04
Loss = 1.4311e-01, PNorm = 71.6283, GNorm = 0.8860, lr_0 = 5.3928e-04
Loss = 1.8163e-01, PNorm = 71.6448, GNorm = 0.7658, lr_0 = 5.3891e-04
Loss = 1.1962e-01, PNorm = 71.6656, GNorm = 0.7718, lr_0 = 5.3854e-04
Loss = 1.3849e-01, PNorm = 71.6810, GNorm = 1.1330, lr_0 = 5.3817e-04
Loss = 1.3189e-01, PNorm = 71.6968, GNorm = 0.6884, lr_0 = 5.3781e-04
Loss = 1.4993e-01, PNorm = 71.7131, GNorm = 0.6871, lr_0 = 5.3744e-04
Loss = 1.1655e-01, PNorm = 71.7270, GNorm = 0.4414, lr_0 = 5.3707e-04
Loss = 1.3428e-01, PNorm = 71.7484, GNorm = 0.5551, lr_0 = 5.3670e-04
Loss = 1.5154e-01, PNorm = 71.7635, GNorm = 0.7614, lr_0 = 5.3633e-04
Loss = 1.4676e-01, PNorm = 71.7819, GNorm = 0.7138, lr_0 = 5.3597e-04
Loss = 1.3977e-01, PNorm = 71.7953, GNorm = 1.0523, lr_0 = 5.3560e-04
Loss = 1.4965e-01, PNorm = 71.8116, GNorm = 0.9351, lr_0 = 5.3523e-04
Loss = 1.4033e-01, PNorm = 71.8241, GNorm = 0.9805, lr_0 = 5.3486e-04
Loss = 1.3795e-01, PNorm = 71.8365, GNorm = 0.9138, lr_0 = 5.3450e-04
Loss = 1.4064e-01, PNorm = 71.8508, GNorm = 0.6081, lr_0 = 5.3413e-04
Loss = 1.3957e-01, PNorm = 71.8647, GNorm = 0.7368, lr_0 = 5.3377e-04
Loss = 1.3343e-01, PNorm = 71.8769, GNorm = 1.1593, lr_0 = 5.3340e-04
Loss = 1.2438e-01, PNorm = 71.8894, GNorm = 0.6245, lr_0 = 5.3304e-04
Loss = 1.1676e-01, PNorm = 71.9060, GNorm = 0.7975, lr_0 = 5.3267e-04
Loss = 1.4614e-01, PNorm = 71.9200, GNorm = 0.8592, lr_0 = 5.3231e-04
Loss = 1.5359e-01, PNorm = 71.9374, GNorm = 0.6501, lr_0 = 5.3194e-04
Loss = 1.3684e-01, PNorm = 71.9551, GNorm = 1.3777, lr_0 = 5.3158e-04
Loss = 1.3369e-01, PNorm = 71.9663, GNorm = 0.9075, lr_0 = 5.3121e-04
Loss = 1.3158e-01, PNorm = 71.9779, GNorm = 0.7124, lr_0 = 5.3085e-04
Loss = 1.4712e-01, PNorm = 71.9859, GNorm = 0.5906, lr_0 = 5.3048e-04
Loss = 1.3672e-01, PNorm = 72.0022, GNorm = 0.8254, lr_0 = 5.3012e-04
Loss = 1.4309e-01, PNorm = 72.0150, GNorm = 0.8990, lr_0 = 5.2976e-04
Loss = 1.2564e-01, PNorm = 72.0287, GNorm = 0.5180, lr_0 = 5.2939e-04
Loss = 1.3694e-01, PNorm = 72.0423, GNorm = 0.7624, lr_0 = 5.2903e-04
Loss = 1.3703e-01, PNorm = 72.0517, GNorm = 0.8256, lr_0 = 5.2867e-04
Loss = 1.3339e-01, PNorm = 72.0658, GNorm = 1.7270, lr_0 = 5.2831e-04
Loss = 1.7111e-01, PNorm = 72.0829, GNorm = 1.1060, lr_0 = 5.2795e-04
Loss = 1.4687e-01, PNorm = 72.1074, GNorm = 0.6016, lr_0 = 5.2758e-04
Loss = 1.3716e-01, PNorm = 72.1239, GNorm = 1.1201, lr_0 = 5.2722e-04
Loss = 1.5987e-01, PNorm = 72.1447, GNorm = 0.9340, lr_0 = 5.2686e-04
Loss = 1.3328e-01, PNorm = 72.1583, GNorm = 0.7490, lr_0 = 5.2650e-04
Loss = 1.6773e-01, PNorm = 72.1705, GNorm = 0.8392, lr_0 = 5.2614e-04
Loss = 1.4874e-01, PNorm = 72.1899, GNorm = 0.7436, lr_0 = 5.2578e-04
Loss = 1.3045e-01, PNorm = 72.2032, GNorm = 0.5296, lr_0 = 5.2542e-04
Loss = 1.2773e-01, PNorm = 72.2205, GNorm = 0.6182, lr_0 = 5.2506e-04
Loss = 1.2370e-01, PNorm = 72.2346, GNorm = 0.7853, lr_0 = 5.2470e-04
Loss = 1.2531e-01, PNorm = 72.2491, GNorm = 0.6205, lr_0 = 5.2434e-04
Loss = 1.2923e-01, PNorm = 72.2562, GNorm = 0.9132, lr_0 = 5.2398e-04
Loss = 1.3528e-01, PNorm = 72.2711, GNorm = 0.9648, lr_0 = 5.2362e-04
Loss = 1.4496e-01, PNorm = 72.2825, GNorm = 0.5793, lr_0 = 5.2326e-04
Loss = 1.4582e-01, PNorm = 72.2991, GNorm = 1.2485, lr_0 = 5.2290e-04
Loss = 1.4475e-01, PNorm = 72.3144, GNorm = 0.5637, lr_0 = 5.2255e-04
Loss = 1.5573e-01, PNorm = 72.3350, GNorm = 1.2258, lr_0 = 5.2219e-04
Loss = 1.7266e-01, PNorm = 72.3476, GNorm = 1.1100, lr_0 = 5.2183e-04
Loss = 1.4674e-01, PNorm = 72.3660, GNorm = 0.8162, lr_0 = 5.2147e-04
Loss = 1.4054e-01, PNorm = 72.3835, GNorm = 0.8028, lr_0 = 5.2112e-04
Loss = 1.5903e-01, PNorm = 72.4088, GNorm = 0.7447, lr_0 = 5.2076e-04
Loss = 1.2165e-01, PNorm = 72.4300, GNorm = 0.6103, lr_0 = 5.2040e-04
Loss = 1.3335e-01, PNorm = 72.4404, GNorm = 0.7203, lr_0 = 5.2005e-04
Loss = 1.6653e-01, PNorm = 72.4508, GNorm = 1.1458, lr_0 = 5.1969e-04
Loss = 1.2568e-01, PNorm = 72.4643, GNorm = 0.6378, lr_0 = 5.1933e-04
Loss = 1.3949e-01, PNorm = 72.4781, GNorm = 0.6374, lr_0 = 5.1898e-04
Loss = 1.5259e-01, PNorm = 72.4964, GNorm = 0.7604, lr_0 = 5.1862e-04
Loss = 1.4180e-01, PNorm = 72.5111, GNorm = 1.1681, lr_0 = 5.1827e-04
Loss = 1.3956e-01, PNorm = 72.5227, GNorm = 1.0267, lr_0 = 5.1791e-04
Validation mae = 0.239939
Epoch 10
Loss = 1.2271e-01, PNorm = 72.5364, GNorm = 0.5976, lr_0 = 5.1756e-04
Loss = 1.3800e-01, PNorm = 72.5506, GNorm = 0.8638, lr_0 = 5.1720e-04
Loss = 1.4221e-01, PNorm = 72.5644, GNorm = 0.8110, lr_0 = 5.1685e-04
Loss = 1.2315e-01, PNorm = 72.5754, GNorm = 0.7143, lr_0 = 5.1649e-04
Loss = 1.4008e-01, PNorm = 72.5908, GNorm = 0.5809, lr_0 = 5.1614e-04
Loss = 1.4860e-01, PNorm = 72.6049, GNorm = 0.7178, lr_0 = 5.1579e-04
Loss = 1.4235e-01, PNorm = 72.6176, GNorm = 0.6129, lr_0 = 5.1543e-04
Loss = 1.2440e-01, PNorm = 72.6350, GNorm = 0.5223, lr_0 = 5.1508e-04
Loss = 1.2608e-01, PNorm = 72.6498, GNorm = 0.5283, lr_0 = 5.1473e-04
Loss = 1.4647e-01, PNorm = 72.6685, GNorm = 0.6306, lr_0 = 5.1437e-04
Loss = 1.2295e-01, PNorm = 72.6833, GNorm = 0.7362, lr_0 = 5.1402e-04
Loss = 1.3309e-01, PNorm = 72.6935, GNorm = 0.8429, lr_0 = 5.1367e-04
Loss = 1.3339e-01, PNorm = 72.7075, GNorm = 0.6981, lr_0 = 5.1332e-04
Loss = 1.2708e-01, PNorm = 72.7213, GNorm = 0.6800, lr_0 = 5.1297e-04
Loss = 1.3627e-01, PNorm = 72.7362, GNorm = 0.7573, lr_0 = 5.1262e-04
Loss = 1.2913e-01, PNorm = 72.7482, GNorm = 0.6747, lr_0 = 5.1226e-04
Loss = 1.4108e-01, PNorm = 72.7601, GNorm = 0.5823, lr_0 = 5.1191e-04
Loss = 1.3364e-01, PNorm = 72.7787, GNorm = 0.6149, lr_0 = 5.1156e-04
Loss = 1.2975e-01, PNorm = 72.7929, GNorm = 0.7354, lr_0 = 5.1121e-04
Loss = 1.3461e-01, PNorm = 72.8057, GNorm = 1.1061, lr_0 = 5.1086e-04
Loss = 1.4367e-01, PNorm = 72.8161, GNorm = 0.7869, lr_0 = 5.1051e-04
Loss = 1.2034e-01, PNorm = 72.8294, GNorm = 0.6751, lr_0 = 5.1016e-04
Loss = 1.2846e-01, PNorm = 72.8457, GNorm = 0.7572, lr_0 = 5.0981e-04
Loss = 1.3691e-01, PNorm = 72.8661, GNorm = 1.1131, lr_0 = 5.0946e-04
Loss = 1.2460e-01, PNorm = 72.8815, GNorm = 0.7395, lr_0 = 5.0911e-04
Loss = 1.2261e-01, PNorm = 72.8966, GNorm = 0.6647, lr_0 = 5.0877e-04
Loss = 1.3826e-01, PNorm = 72.9080, GNorm = 0.7660, lr_0 = 5.0842e-04
Loss = 1.3945e-01, PNorm = 72.9186, GNorm = 0.8033, lr_0 = 5.0807e-04
Loss = 1.4085e-01, PNorm = 72.9351, GNorm = 0.9974, lr_0 = 5.0772e-04
Loss = 1.3137e-01, PNorm = 72.9491, GNorm = 0.6443, lr_0 = 5.0737e-04
Loss = 1.1917e-01, PNorm = 72.9655, GNorm = 0.4782, lr_0 = 5.0703e-04
Loss = 1.0517e-01, PNorm = 72.9807, GNorm = 0.7470, lr_0 = 5.0668e-04
Loss = 1.3808e-01, PNorm = 72.9924, GNorm = 0.9738, lr_0 = 5.0633e-04
Loss = 1.2665e-01, PNorm = 73.0072, GNorm = 0.8071, lr_0 = 5.0598e-04
Loss = 1.5823e-01, PNorm = 73.0196, GNorm = 0.6648, lr_0 = 5.0564e-04
Loss = 1.5683e-01, PNorm = 73.0395, GNorm = 0.9609, lr_0 = 5.0529e-04
Loss = 1.2005e-01, PNorm = 73.0569, GNorm = 0.5417, lr_0 = 5.0494e-04
Loss = 1.4673e-01, PNorm = 73.0751, GNorm = 0.5431, lr_0 = 5.0460e-04
Loss = 1.4259e-01, PNorm = 73.0890, GNorm = 0.9004, lr_0 = 5.0425e-04
Loss = 1.6385e-01, PNorm = 73.1083, GNorm = 1.1476, lr_0 = 5.0391e-04
Loss = 1.4749e-01, PNorm = 73.1319, GNorm = 0.6242, lr_0 = 5.0356e-04
Loss = 1.6135e-01, PNorm = 73.1486, GNorm = 1.1212, lr_0 = 5.0322e-04
Loss = 1.3948e-01, PNorm = 73.1693, GNorm = 0.7306, lr_0 = 5.0287e-04
Loss = 1.2352e-01, PNorm = 73.1885, GNorm = 0.7293, lr_0 = 5.0253e-04
Loss = 1.2816e-01, PNorm = 73.2015, GNorm = 0.6683, lr_0 = 5.0218e-04
Loss = 1.3677e-01, PNorm = 73.2183, GNorm = 0.6779, lr_0 = 5.0184e-04
Loss = 1.3184e-01, PNorm = 73.2331, GNorm = 0.5739, lr_0 = 5.0150e-04
Loss = 1.3101e-01, PNorm = 73.2433, GNorm = 0.6669, lr_0 = 5.0115e-04
Loss = 1.4567e-01, PNorm = 73.2652, GNorm = 0.9898, lr_0 = 5.0081e-04
Loss = 1.3415e-01, PNorm = 73.2811, GNorm = 0.6026, lr_0 = 5.0047e-04
Loss = 1.2737e-01, PNorm = 73.2990, GNorm = 0.7733, lr_0 = 5.0012e-04
Loss = 1.1956e-01, PNorm = 73.3141, GNorm = 0.4952, lr_0 = 4.9978e-04
Loss = 1.3998e-01, PNorm = 73.3257, GNorm = 0.8563, lr_0 = 4.9944e-04
Loss = 1.5328e-01, PNorm = 73.3412, GNorm = 0.4208, lr_0 = 4.9910e-04
Loss = 1.2701e-01, PNorm = 73.3556, GNorm = 1.3241, lr_0 = 4.9875e-04
Loss = 1.4514e-01, PNorm = 73.3736, GNorm = 0.9532, lr_0 = 4.9841e-04
Loss = 1.2751e-01, PNorm = 73.3879, GNorm = 0.7796, lr_0 = 4.9807e-04
Loss = 1.4047e-01, PNorm = 73.3991, GNorm = 0.7040, lr_0 = 4.9773e-04
Loss = 1.3455e-01, PNorm = 73.4205, GNorm = 0.7223, lr_0 = 4.9739e-04
Loss = 1.2117e-01, PNorm = 73.4378, GNorm = 0.7229, lr_0 = 4.9705e-04
Loss = 1.2905e-01, PNorm = 73.4525, GNorm = 0.5901, lr_0 = 4.9671e-04
Loss = 1.2095e-01, PNorm = 73.4625, GNorm = 0.7661, lr_0 = 4.9637e-04
Loss = 1.3646e-01, PNorm = 73.4724, GNorm = 0.9859, lr_0 = 4.9603e-04
Loss = 1.2079e-01, PNorm = 73.4878, GNorm = 0.5817, lr_0 = 4.9569e-04
Loss = 1.2401e-01, PNorm = 73.4962, GNorm = 0.9369, lr_0 = 4.9535e-04
Loss = 1.3990e-01, PNorm = 73.5049, GNorm = 0.7849, lr_0 = 4.9501e-04
Loss = 1.3522e-01, PNorm = 73.5208, GNorm = 0.7926, lr_0 = 4.9467e-04
Loss = 1.3209e-01, PNorm = 73.5366, GNorm = 1.4774, lr_0 = 4.9433e-04
Loss = 1.4106e-01, PNorm = 73.5506, GNorm = 0.5301, lr_0 = 4.9399e-04
Loss = 1.3402e-01, PNorm = 73.5683, GNorm = 0.9113, lr_0 = 4.9365e-04
Loss = 1.6767e-01, PNorm = 73.5850, GNorm = 0.7224, lr_0 = 4.9332e-04
Loss = 1.2611e-01, PNorm = 73.6059, GNorm = 1.2186, lr_0 = 4.9298e-04
Loss = 1.3950e-01, PNorm = 73.6183, GNorm = 0.7348, lr_0 = 4.9264e-04
Loss = 1.2304e-01, PNorm = 73.6369, GNorm = 0.5421, lr_0 = 4.9230e-04
Loss = 1.3272e-01, PNorm = 73.6513, GNorm = 1.3769, lr_0 = 4.9197e-04
Loss = 1.5837e-01, PNorm = 73.6667, GNorm = 0.8195, lr_0 = 4.9163e-04
Loss = 1.3533e-01, PNorm = 73.6822, GNorm = 0.7241, lr_0 = 4.9129e-04
Loss = 1.2532e-01, PNorm = 73.6940, GNorm = 1.1713, lr_0 = 4.9095e-04
Loss = 1.3370e-01, PNorm = 73.7091, GNorm = 1.1662, lr_0 = 4.9062e-04
Loss = 1.5015e-01, PNorm = 73.7283, GNorm = 0.6220, lr_0 = 4.9028e-04
Loss = 1.1919e-01, PNorm = 73.7448, GNorm = 0.6168, lr_0 = 4.8995e-04
Loss = 1.2802e-01, PNorm = 73.7583, GNorm = 0.6398, lr_0 = 4.8961e-04
Loss = 1.4169e-01, PNorm = 73.7709, GNorm = 0.7140, lr_0 = 4.8928e-04
Loss = 1.3067e-01, PNorm = 73.7811, GNorm = 0.7212, lr_0 = 4.8894e-04
Loss = 1.4441e-01, PNorm = 73.7970, GNorm = 0.5925, lr_0 = 4.8861e-04
Loss = 1.4286e-01, PNorm = 73.8120, GNorm = 0.8377, lr_0 = 4.8827e-04
Loss = 1.2179e-01, PNorm = 73.8273, GNorm = 0.9563, lr_0 = 4.8794e-04
Loss = 1.3174e-01, PNorm = 73.8383, GNorm = 0.5806, lr_0 = 4.8760e-04
Loss = 1.5111e-01, PNorm = 73.8543, GNorm = 0.6701, lr_0 = 4.8727e-04
Loss = 1.3544e-01, PNorm = 73.8644, GNorm = 1.1996, lr_0 = 4.8693e-04
Loss = 1.2726e-01, PNorm = 73.8766, GNorm = 0.7106, lr_0 = 4.8660e-04
Loss = 1.4957e-01, PNorm = 73.8934, GNorm = 0.6888, lr_0 = 4.8627e-04
Loss = 1.4832e-01, PNorm = 73.9087, GNorm = 0.5307, lr_0 = 4.8593e-04
Loss = 1.3018e-01, PNorm = 73.9239, GNorm = 1.0922, lr_0 = 4.8560e-04
Loss = 1.3880e-01, PNorm = 73.9360, GNorm = 0.8924, lr_0 = 4.8527e-04
Loss = 1.4186e-01, PNorm = 73.9497, GNorm = 1.0036, lr_0 = 4.8494e-04
Loss = 1.3491e-01, PNorm = 73.9553, GNorm = 0.6200, lr_0 = 4.8460e-04
Loss = 1.3556e-01, PNorm = 73.9675, GNorm = 0.9342, lr_0 = 4.8427e-04
Loss = 1.4699e-01, PNorm = 73.9820, GNorm = 0.9824, lr_0 = 4.8394e-04
Loss = 1.3321e-01, PNorm = 74.0033, GNorm = 0.6580, lr_0 = 4.8361e-04
Loss = 1.2697e-01, PNorm = 74.0166, GNorm = 1.0153, lr_0 = 4.8328e-04
Loss = 1.3776e-01, PNorm = 74.0317, GNorm = 1.2306, lr_0 = 4.8295e-04
Loss = 1.4053e-01, PNorm = 74.0469, GNorm = 0.5641, lr_0 = 4.8262e-04
Loss = 1.3267e-01, PNorm = 74.0596, GNorm = 0.7237, lr_0 = 4.8228e-04
Loss = 1.4507e-01, PNorm = 74.0732, GNorm = 0.8620, lr_0 = 4.8195e-04
Loss = 1.2888e-01, PNorm = 74.0856, GNorm = 0.8257, lr_0 = 4.8162e-04
Loss = 1.1875e-01, PNorm = 74.0992, GNorm = 0.6986, lr_0 = 4.8129e-04
Loss = 1.3326e-01, PNorm = 74.1106, GNorm = 0.5919, lr_0 = 4.8096e-04
Loss = 1.2752e-01, PNorm = 74.1215, GNorm = 1.4384, lr_0 = 4.8064e-04
Loss = 1.1980e-01, PNorm = 74.1328, GNorm = 0.9276, lr_0 = 4.8031e-04
Loss = 1.3925e-01, PNorm = 74.1481, GNorm = 0.7013, lr_0 = 4.7998e-04
Loss = 1.4543e-01, PNorm = 74.1569, GNorm = 0.6523, lr_0 = 4.7965e-04
Loss = 1.1736e-01, PNorm = 74.1677, GNorm = 0.4844, lr_0 = 4.7932e-04
Loss = 1.3212e-01, PNorm = 74.1803, GNorm = 0.6047, lr_0 = 4.7899e-04
Loss = 1.6063e-01, PNorm = 74.1923, GNorm = 1.0252, lr_0 = 4.7866e-04
Loss = 1.4615e-01, PNorm = 74.2092, GNorm = 0.9196, lr_0 = 4.7833e-04
Loss = 1.2225e-01, PNorm = 74.2237, GNorm = 0.7307, lr_0 = 4.7801e-04
Loss = 1.0754e-01, PNorm = 74.2394, GNorm = 0.4890, lr_0 = 4.7768e-04
Loss = 1.5073e-01, PNorm = 74.2542, GNorm = 0.7495, lr_0 = 4.7735e-04
Loss = 1.5871e-01, PNorm = 74.2698, GNorm = 0.7955, lr_0 = 4.7703e-04
Validation mae = 0.236236
Epoch 11
Loss = 1.2486e-01, PNorm = 74.2800, GNorm = 0.7943, lr_0 = 4.7670e-04
Loss = 1.1105e-01, PNorm = 74.2938, GNorm = 0.6337, lr_0 = 4.7637e-04
Loss = 1.3076e-01, PNorm = 74.3113, GNorm = 1.0330, lr_0 = 4.7605e-04
Loss = 1.4308e-01, PNorm = 74.3220, GNorm = 0.5572, lr_0 = 4.7572e-04
Loss = 1.2413e-01, PNorm = 74.3348, GNorm = 0.6886, lr_0 = 4.7539e-04
Loss = 1.3055e-01, PNorm = 74.3466, GNorm = 0.5215, lr_0 = 4.7507e-04
Loss = 1.0692e-01, PNorm = 74.3589, GNorm = 0.8676, lr_0 = 4.7474e-04
Loss = 1.1655e-01, PNorm = 74.3663, GNorm = 0.6224, lr_0 = 4.7442e-04
Loss = 1.3639e-01, PNorm = 74.3789, GNorm = 1.4685, lr_0 = 4.7409e-04
Loss = 1.3761e-01, PNorm = 74.3984, GNorm = 1.0272, lr_0 = 4.7377e-04
Loss = 1.2350e-01, PNorm = 74.4182, GNorm = 0.5299, lr_0 = 4.7344e-04
Loss = 1.2130e-01, PNorm = 74.4329, GNorm = 0.7211, lr_0 = 4.7312e-04
Loss = 1.2091e-01, PNorm = 74.4535, GNorm = 0.5506, lr_0 = 4.7279e-04
Loss = 1.1556e-01, PNorm = 74.4658, GNorm = 1.0538, lr_0 = 4.7247e-04
Loss = 1.2081e-01, PNorm = 74.4833, GNorm = 0.6285, lr_0 = 4.7215e-04
Loss = 1.0680e-01, PNorm = 74.4986, GNorm = 0.5213, lr_0 = 4.7182e-04
Loss = 1.3017e-01, PNorm = 74.5112, GNorm = 1.1579, lr_0 = 4.7150e-04
Loss = 1.2463e-01, PNorm = 74.5270, GNorm = 0.9315, lr_0 = 4.7118e-04
Loss = 1.2758e-01, PNorm = 74.5421, GNorm = 0.9193, lr_0 = 4.7085e-04
Loss = 1.4367e-01, PNorm = 74.5529, GNorm = 0.6795, lr_0 = 4.7053e-04
Loss = 1.1808e-01, PNorm = 74.5649, GNorm = 0.7748, lr_0 = 4.7021e-04
Loss = 1.1669e-01, PNorm = 74.5756, GNorm = 0.5544, lr_0 = 4.6989e-04
Loss = 1.1897e-01, PNorm = 74.5842, GNorm = 0.4655, lr_0 = 4.6957e-04
Loss = 1.2903e-01, PNorm = 74.6028, GNorm = 0.8342, lr_0 = 4.6924e-04
Loss = 1.3699e-01, PNorm = 74.6129, GNorm = 0.7524, lr_0 = 4.6892e-04
Loss = 1.4175e-01, PNorm = 74.6217, GNorm = 0.8820, lr_0 = 4.6860e-04
Loss = 1.2165e-01, PNorm = 74.6301, GNorm = 0.6567, lr_0 = 4.6828e-04
Loss = 1.2572e-01, PNorm = 74.6411, GNorm = 0.5849, lr_0 = 4.6796e-04
Loss = 1.3153e-01, PNorm = 74.6551, GNorm = 0.6685, lr_0 = 4.6764e-04
Loss = 1.1999e-01, PNorm = 74.6694, GNorm = 0.6779, lr_0 = 4.6732e-04
Loss = 1.3249e-01, PNorm = 74.6844, GNorm = 0.7140, lr_0 = 4.6700e-04
Loss = 1.2267e-01, PNorm = 74.6998, GNorm = 0.6821, lr_0 = 4.6668e-04
Loss = 1.1336e-01, PNorm = 74.7160, GNorm = 0.7849, lr_0 = 4.6636e-04
Loss = 1.3631e-01, PNorm = 74.7351, GNorm = 0.5495, lr_0 = 4.6604e-04
Loss = 1.2867e-01, PNorm = 74.7486, GNorm = 1.3733, lr_0 = 4.6572e-04
Loss = 1.3324e-01, PNorm = 74.7652, GNorm = 1.2451, lr_0 = 4.6540e-04
Loss = 1.5007e-01, PNorm = 74.7792, GNorm = 0.9437, lr_0 = 4.6508e-04
Loss = 1.1672e-01, PNorm = 74.7914, GNorm = 0.8124, lr_0 = 4.6476e-04
Loss = 1.3876e-01, PNorm = 74.8021, GNorm = 0.5474, lr_0 = 4.6445e-04
Loss = 1.2485e-01, PNorm = 74.8147, GNorm = 0.4953, lr_0 = 4.6413e-04
Loss = 1.1593e-01, PNorm = 74.8296, GNorm = 0.6014, lr_0 = 4.6381e-04
Loss = 1.1265e-01, PNorm = 74.8435, GNorm = 0.5223, lr_0 = 4.6349e-04
Loss = 1.2159e-01, PNorm = 74.8558, GNorm = 0.7690, lr_0 = 4.6317e-04
Loss = 1.2631e-01, PNorm = 74.8630, GNorm = 0.6544, lr_0 = 4.6286e-04
Loss = 1.4046e-01, PNorm = 74.8735, GNorm = 0.6301, lr_0 = 4.6254e-04
Loss = 1.2392e-01, PNorm = 74.8849, GNorm = 1.2165, lr_0 = 4.6222e-04
Loss = 1.4875e-01, PNorm = 74.9034, GNorm = 1.0405, lr_0 = 4.6191e-04
Loss = 1.2750e-01, PNorm = 74.9210, GNorm = 0.6658, lr_0 = 4.6159e-04
Loss = 1.3826e-01, PNorm = 74.9347, GNorm = 1.0468, lr_0 = 4.6127e-04
Loss = 1.2465e-01, PNorm = 74.9510, GNorm = 0.7155, lr_0 = 4.6096e-04
Loss = 1.1492e-01, PNorm = 74.9662, GNorm = 0.4967, lr_0 = 4.6064e-04
Loss = 1.3294e-01, PNorm = 74.9817, GNorm = 0.9526, lr_0 = 4.6033e-04
Loss = 1.2152e-01, PNorm = 74.9919, GNorm = 0.5573, lr_0 = 4.6001e-04
Loss = 1.1902e-01, PNorm = 75.0038, GNorm = 0.5629, lr_0 = 4.5970e-04
Loss = 1.2313e-01, PNorm = 75.0143, GNorm = 0.6998, lr_0 = 4.5938e-04
Loss = 1.2234e-01, PNorm = 75.0246, GNorm = 0.5480, lr_0 = 4.5907e-04
Loss = 1.3395e-01, PNorm = 75.0400, GNorm = 0.5979, lr_0 = 4.5875e-04
Loss = 1.4298e-01, PNorm = 75.0532, GNorm = 0.9168, lr_0 = 4.5844e-04
Loss = 1.2907e-01, PNorm = 75.0734, GNorm = 0.5736, lr_0 = 4.5812e-04
Loss = 1.3057e-01, PNorm = 75.0868, GNorm = 0.7782, lr_0 = 4.5781e-04
Loss = 1.3419e-01, PNorm = 75.1022, GNorm = 0.6913, lr_0 = 4.5750e-04
Loss = 1.0887e-01, PNorm = 75.1179, GNorm = 0.7681, lr_0 = 4.5718e-04
Loss = 1.3017e-01, PNorm = 75.1288, GNorm = 0.7331, lr_0 = 4.5687e-04
Loss = 1.1972e-01, PNorm = 75.1431, GNorm = 0.8613, lr_0 = 4.5656e-04
Loss = 1.2860e-01, PNorm = 75.1534, GNorm = 0.9421, lr_0 = 4.5624e-04
Loss = 1.3228e-01, PNorm = 75.1639, GNorm = 0.8664, lr_0 = 4.5593e-04
Loss = 1.3220e-01, PNorm = 75.1761, GNorm = 0.6019, lr_0 = 4.5562e-04
Loss = 1.3611e-01, PNorm = 75.1939, GNorm = 0.6465, lr_0 = 4.5531e-04
Loss = 1.6545e-01, PNorm = 75.2136, GNorm = 0.6688, lr_0 = 4.5499e-04
Loss = 1.2392e-01, PNorm = 75.2310, GNorm = 0.9216, lr_0 = 4.5468e-04
Loss = 1.3141e-01, PNorm = 75.2413, GNorm = 0.5637, lr_0 = 4.5437e-04
Loss = 1.2907e-01, PNorm = 75.2510, GNorm = 0.7672, lr_0 = 4.5406e-04
Loss = 1.2793e-01, PNorm = 75.2561, GNorm = 0.7344, lr_0 = 4.5375e-04
Loss = 1.3916e-01, PNorm = 75.2656, GNorm = 1.0072, lr_0 = 4.5344e-04
Loss = 1.2107e-01, PNorm = 75.2764, GNorm = 0.7747, lr_0 = 4.5313e-04
Loss = 1.3185e-01, PNorm = 75.2915, GNorm = 0.7777, lr_0 = 4.5282e-04
Loss = 1.4487e-01, PNorm = 75.3005, GNorm = 0.8383, lr_0 = 4.5251e-04
Loss = 1.4187e-01, PNorm = 75.3150, GNorm = 0.5638, lr_0 = 4.5220e-04
Loss = 1.1708e-01, PNorm = 75.3257, GNorm = 0.7229, lr_0 = 4.5189e-04
Loss = 1.0928e-01, PNorm = 75.3382, GNorm = 0.7967, lr_0 = 4.5158e-04
Loss = 1.4273e-01, PNorm = 75.3568, GNorm = 0.9359, lr_0 = 4.5127e-04
Loss = 1.0925e-01, PNorm = 75.3711, GNorm = 0.4750, lr_0 = 4.5096e-04
Loss = 1.1149e-01, PNorm = 75.3825, GNorm = 0.4056, lr_0 = 4.5065e-04
Loss = 1.2525e-01, PNorm = 75.3908, GNorm = 0.8506, lr_0 = 4.5034e-04
Loss = 1.3043e-01, PNorm = 75.3986, GNorm = 0.6145, lr_0 = 4.5003e-04
Loss = 1.3007e-01, PNorm = 75.4140, GNorm = 1.6938, lr_0 = 4.4972e-04
Loss = 1.4505e-01, PNorm = 75.4380, GNorm = 0.8073, lr_0 = 4.4942e-04
Loss = 1.2669e-01, PNorm = 75.4506, GNorm = 1.0408, lr_0 = 4.4911e-04
Loss = 1.3358e-01, PNorm = 75.4667, GNorm = 0.5875, lr_0 = 4.4880e-04
Loss = 1.3726e-01, PNorm = 75.4787, GNorm = 0.7151, lr_0 = 4.4849e-04
Loss = 1.1858e-01, PNorm = 75.4903, GNorm = 0.8164, lr_0 = 4.4819e-04
Loss = 1.2276e-01, PNorm = 75.5094, GNorm = 1.0036, lr_0 = 4.4788e-04
Loss = 1.4178e-01, PNorm = 75.5225, GNorm = 0.7375, lr_0 = 4.4757e-04
Loss = 1.3341e-01, PNorm = 75.5306, GNorm = 0.5625, lr_0 = 4.4727e-04
Loss = 1.2905e-01, PNorm = 75.5411, GNorm = 0.7965, lr_0 = 4.4696e-04
Loss = 1.3410e-01, PNorm = 75.5509, GNorm = 0.6559, lr_0 = 4.4665e-04
Loss = 1.3078e-01, PNorm = 75.5614, GNorm = 0.5412, lr_0 = 4.4635e-04
Loss = 1.3383e-01, PNorm = 75.5747, GNorm = 0.7015, lr_0 = 4.4604e-04
Loss = 1.2360e-01, PNorm = 75.5880, GNorm = 0.7250, lr_0 = 4.4574e-04
Loss = 1.2892e-01, PNorm = 75.6048, GNorm = 0.9051, lr_0 = 4.4543e-04
Loss = 1.2303e-01, PNorm = 75.6166, GNorm = 0.6349, lr_0 = 4.4513e-04
Loss = 1.1597e-01, PNorm = 75.6340, GNorm = 0.5305, lr_0 = 4.4482e-04
Loss = 1.2924e-01, PNorm = 75.6397, GNorm = 0.7615, lr_0 = 4.4452e-04
Loss = 1.2137e-01, PNorm = 75.6473, GNorm = 0.6028, lr_0 = 4.4421e-04
Loss = 1.3168e-01, PNorm = 75.6575, GNorm = 0.8593, lr_0 = 4.4391e-04
Loss = 1.1710e-01, PNorm = 75.6720, GNorm = 0.6337, lr_0 = 4.4360e-04
Loss = 1.2893e-01, PNorm = 75.6832, GNorm = 0.6787, lr_0 = 4.4330e-04
Loss = 1.2388e-01, PNorm = 75.6980, GNorm = 0.5832, lr_0 = 4.4299e-04
Loss = 1.4048e-01, PNorm = 75.7135, GNorm = 0.6724, lr_0 = 4.4269e-04
Loss = 1.2733e-01, PNorm = 75.7251, GNorm = 0.6912, lr_0 = 4.4239e-04
Loss = 1.3883e-01, PNorm = 75.7382, GNorm = 0.5823, lr_0 = 4.4209e-04
Loss = 1.3910e-01, PNorm = 75.7492, GNorm = 1.1936, lr_0 = 4.4178e-04
Loss = 1.3199e-01, PNorm = 75.7603, GNorm = 0.7264, lr_0 = 4.4148e-04
Loss = 1.1597e-01, PNorm = 75.7652, GNorm = 0.5931, lr_0 = 4.4118e-04
Loss = 1.4972e-01, PNorm = 75.7741, GNorm = 0.7575, lr_0 = 4.4088e-04
Loss = 1.5498e-01, PNorm = 75.7874, GNorm = 0.7480, lr_0 = 4.4057e-04
Loss = 1.4247e-01, PNorm = 75.8042, GNorm = 0.6647, lr_0 = 4.4027e-04
Loss = 1.1908e-01, PNorm = 75.8166, GNorm = 0.6442, lr_0 = 4.3997e-04
Loss = 1.1604e-01, PNorm = 75.8331, GNorm = 0.6934, lr_0 = 4.3967e-04
Loss = 1.0925e-01, PNorm = 75.8469, GNorm = 0.4899, lr_0 = 4.3937e-04
Validation mae = 0.233133
Epoch 12
Loss = 1.1495e-01, PNorm = 75.8605, GNorm = 0.6244, lr_0 = 4.3907e-04
Loss = 1.3165e-01, PNorm = 75.8672, GNorm = 0.6494, lr_0 = 4.3877e-04
Loss = 1.0583e-01, PNorm = 75.8840, GNorm = 0.6271, lr_0 = 4.3846e-04
Loss = 1.1282e-01, PNorm = 75.8943, GNorm = 0.6930, lr_0 = 4.3816e-04
Loss = 1.1985e-01, PNorm = 75.9083, GNorm = 0.8203, lr_0 = 4.3786e-04
Loss = 1.4179e-01, PNorm = 75.9140, GNorm = 0.9503, lr_0 = 4.3756e-04
Loss = 1.1616e-01, PNorm = 75.9256, GNorm = 0.6625, lr_0 = 4.3726e-04
Loss = 1.3202e-01, PNorm = 75.9388, GNorm = 1.1652, lr_0 = 4.3696e-04
Loss = 1.2274e-01, PNorm = 75.9503, GNorm = 1.0215, lr_0 = 4.3667e-04
Loss = 1.0761e-01, PNorm = 75.9698, GNorm = 0.7998, lr_0 = 4.3637e-04
Loss = 1.2074e-01, PNorm = 75.9839, GNorm = 0.8235, lr_0 = 4.3607e-04
Loss = 1.1630e-01, PNorm = 75.9958, GNorm = 0.6656, lr_0 = 4.3577e-04
Loss = 1.2024e-01, PNorm = 76.0079, GNorm = 0.6177, lr_0 = 4.3547e-04
Loss = 1.2355e-01, PNorm = 76.0152, GNorm = 0.6465, lr_0 = 4.3517e-04
Loss = 1.2140e-01, PNorm = 76.0264, GNorm = 1.2847, lr_0 = 4.3487e-04
Loss = 1.2634e-01, PNorm = 76.0389, GNorm = 0.8535, lr_0 = 4.3458e-04
Loss = 1.1263e-01, PNorm = 76.0541, GNorm = 0.8260, lr_0 = 4.3428e-04
Loss = 1.2338e-01, PNorm = 76.0664, GNorm = 0.8872, lr_0 = 4.3398e-04
Loss = 1.1690e-01, PNorm = 76.0781, GNorm = 0.6211, lr_0 = 4.3368e-04
Loss = 1.1354e-01, PNorm = 76.0890, GNorm = 0.6883, lr_0 = 4.3339e-04
Loss = 1.2084e-01, PNorm = 76.0944, GNorm = 0.8875, lr_0 = 4.3309e-04
Loss = 1.1988e-01, PNorm = 76.1048, GNorm = 0.6559, lr_0 = 4.3279e-04
Loss = 1.2536e-01, PNorm = 76.1120, GNorm = 0.9532, lr_0 = 4.3250e-04
Loss = 1.2364e-01, PNorm = 76.1249, GNorm = 0.5006, lr_0 = 4.3220e-04
Loss = 1.2113e-01, PNorm = 76.1325, GNorm = 0.5827, lr_0 = 4.3190e-04
Loss = 1.2330e-01, PNorm = 76.1458, GNorm = 0.5896, lr_0 = 4.3161e-04
Loss = 1.3269e-01, PNorm = 76.1515, GNorm = 1.2613, lr_0 = 4.3131e-04
Loss = 1.1036e-01, PNorm = 76.1627, GNorm = 0.9373, lr_0 = 4.3102e-04
Loss = 1.1528e-01, PNorm = 76.1764, GNorm = 0.9653, lr_0 = 4.3072e-04
Loss = 1.3860e-01, PNorm = 76.1869, GNorm = 0.5955, lr_0 = 4.3043e-04
Loss = 1.1664e-01, PNorm = 76.1980, GNorm = 0.6580, lr_0 = 4.3013e-04
Loss = 1.1398e-01, PNorm = 76.2031, GNorm = 0.5570, lr_0 = 4.2984e-04
Loss = 1.1539e-01, PNorm = 76.2126, GNorm = 0.7379, lr_0 = 4.2954e-04
Loss = 1.0533e-01, PNorm = 76.2203, GNorm = 0.5994, lr_0 = 4.2925e-04
Loss = 1.3300e-01, PNorm = 76.2319, GNorm = 0.5588, lr_0 = 4.2895e-04
Loss = 1.1514e-01, PNorm = 76.2475, GNorm = 0.6722, lr_0 = 4.2866e-04
Loss = 1.0482e-01, PNorm = 76.2572, GNorm = 0.6887, lr_0 = 4.2837e-04
Loss = 1.1831e-01, PNorm = 76.2674, GNorm = 0.7969, lr_0 = 4.2807e-04
Loss = 1.0378e-01, PNorm = 76.2817, GNorm = 0.6995, lr_0 = 4.2778e-04
Loss = 1.1337e-01, PNorm = 76.2960, GNorm = 0.5105, lr_0 = 4.2749e-04
Loss = 1.1651e-01, PNorm = 76.3086, GNorm = 0.6696, lr_0 = 4.2719e-04
Loss = 1.2772e-01, PNorm = 76.3186, GNorm = 0.7871, lr_0 = 4.2690e-04
Loss = 1.2384e-01, PNorm = 76.3286, GNorm = 0.5851, lr_0 = 4.2661e-04
Loss = 1.3647e-01, PNorm = 76.3453, GNorm = 0.6731, lr_0 = 4.2632e-04
Loss = 1.3012e-01, PNorm = 76.3574, GNorm = 1.1065, lr_0 = 4.2602e-04
Loss = 1.1791e-01, PNorm = 76.3715, GNorm = 0.6784, lr_0 = 4.2573e-04
Loss = 1.2332e-01, PNorm = 76.3844, GNorm = 0.4520, lr_0 = 4.2544e-04
Loss = 1.0186e-01, PNorm = 76.3947, GNorm = 0.9330, lr_0 = 4.2515e-04
Loss = 1.1454e-01, PNorm = 76.4055, GNorm = 0.6063, lr_0 = 4.2486e-04
Loss = 1.3254e-01, PNorm = 76.4172, GNorm = 1.0095, lr_0 = 4.2457e-04
Loss = 1.1232e-01, PNorm = 76.4354, GNorm = 0.7305, lr_0 = 4.2428e-04
Loss = 1.2723e-01, PNorm = 76.4460, GNorm = 0.6420, lr_0 = 4.2399e-04
Loss = 1.2229e-01, PNorm = 76.4542, GNorm = 0.6208, lr_0 = 4.2370e-04
Loss = 1.3599e-01, PNorm = 76.4664, GNorm = 1.2024, lr_0 = 4.2340e-04
Loss = 1.2775e-01, PNorm = 76.4794, GNorm = 0.7811, lr_0 = 4.2311e-04
Loss = 1.2699e-01, PNorm = 76.4870, GNorm = 0.9100, lr_0 = 4.2283e-04
Loss = 1.3204e-01, PNorm = 76.4957, GNorm = 0.7289, lr_0 = 4.2254e-04
Loss = 1.1272e-01, PNorm = 76.5061, GNorm = 0.9976, lr_0 = 4.2225e-04
Loss = 1.2069e-01, PNorm = 76.5172, GNorm = 0.6302, lr_0 = 4.2196e-04
Loss = 1.2374e-01, PNorm = 76.5280, GNorm = 0.8395, lr_0 = 4.2167e-04
Loss = 1.1858e-01, PNorm = 76.5385, GNorm = 0.5472, lr_0 = 4.2138e-04
Loss = 1.3139e-01, PNorm = 76.5485, GNorm = 0.7531, lr_0 = 4.2109e-04
Loss = 1.1994e-01, PNorm = 76.5623, GNorm = 0.7491, lr_0 = 4.2080e-04
Loss = 1.2076e-01, PNorm = 76.5730, GNorm = 0.9466, lr_0 = 4.2051e-04
Loss = 1.0721e-01, PNorm = 76.5873, GNorm = 0.5265, lr_0 = 4.2023e-04
Loss = 1.3673e-01, PNorm = 76.5996, GNorm = 1.0001, lr_0 = 4.1994e-04
Loss = 1.1843e-01, PNorm = 76.6149, GNorm = 0.6744, lr_0 = 4.1965e-04
Loss = 1.2798e-01, PNorm = 76.6292, GNorm = 0.7911, lr_0 = 4.1936e-04
Loss = 1.3443e-01, PNorm = 76.6394, GNorm = 0.6745, lr_0 = 4.1907e-04
Loss = 1.2891e-01, PNorm = 76.6500, GNorm = 0.6362, lr_0 = 4.1879e-04
Loss = 1.3246e-01, PNorm = 76.6605, GNorm = 0.4997, lr_0 = 4.1850e-04
Loss = 1.2800e-01, PNorm = 76.6724, GNorm = 1.1882, lr_0 = 4.1821e-04
Loss = 1.3015e-01, PNorm = 76.6839, GNorm = 0.6970, lr_0 = 4.1793e-04
Loss = 1.4365e-01, PNorm = 76.6990, GNorm = 0.7409, lr_0 = 4.1764e-04
Loss = 1.3492e-01, PNorm = 76.7094, GNorm = 0.9416, lr_0 = 4.1736e-04
Loss = 1.2135e-01, PNorm = 76.7214, GNorm = 0.5715, lr_0 = 4.1707e-04
Loss = 1.2983e-01, PNorm = 76.7315, GNorm = 0.6970, lr_0 = 4.1678e-04
Loss = 1.3129e-01, PNorm = 76.7430, GNorm = 0.7308, lr_0 = 4.1650e-04
Loss = 1.1830e-01, PNorm = 76.7581, GNorm = 0.9858, lr_0 = 4.1621e-04
Loss = 1.1900e-01, PNorm = 76.7684, GNorm = 0.6760, lr_0 = 4.1593e-04
Loss = 1.2672e-01, PNorm = 76.7774, GNorm = 1.1209, lr_0 = 4.1564e-04
Loss = 1.1991e-01, PNorm = 76.7821, GNorm = 0.9735, lr_0 = 4.1536e-04
Loss = 1.1552e-01, PNorm = 76.7920, GNorm = 1.1554, lr_0 = 4.1507e-04
Loss = 1.2913e-01, PNorm = 76.8045, GNorm = 0.6502, lr_0 = 4.1479e-04
Loss = 1.1073e-01, PNorm = 76.8186, GNorm = 0.5771, lr_0 = 4.1450e-04
Loss = 1.3576e-01, PNorm = 76.8333, GNorm = 0.6037, lr_0 = 4.1422e-04
Loss = 1.2605e-01, PNorm = 76.8467, GNorm = 0.8091, lr_0 = 4.1394e-04
Loss = 1.2852e-01, PNorm = 76.8522, GNorm = 0.7374, lr_0 = 4.1365e-04
Loss = 1.1278e-01, PNorm = 76.8619, GNorm = 0.5271, lr_0 = 4.1337e-04
Loss = 1.2418e-01, PNorm = 76.8709, GNorm = 0.5076, lr_0 = 4.1309e-04
Loss = 1.0697e-01, PNorm = 76.8814, GNorm = 0.8373, lr_0 = 4.1280e-04
Loss = 1.3525e-01, PNorm = 76.8908, GNorm = 0.6219, lr_0 = 4.1252e-04
Loss = 1.1992e-01, PNorm = 76.9046, GNorm = 0.6658, lr_0 = 4.1224e-04
Loss = 1.0587e-01, PNorm = 76.9119, GNorm = 0.9520, lr_0 = 4.1196e-04
Loss = 1.2985e-01, PNorm = 76.9207, GNorm = 0.7535, lr_0 = 4.1167e-04
Loss = 1.2063e-01, PNorm = 76.9274, GNorm = 0.5211, lr_0 = 4.1139e-04
Loss = 1.1928e-01, PNorm = 76.9359, GNorm = 0.6032, lr_0 = 4.1111e-04
Loss = 1.1396e-01, PNorm = 76.9430, GNorm = 0.5437, lr_0 = 4.1083e-04
Loss = 1.4596e-01, PNorm = 76.9555, GNorm = 0.5749, lr_0 = 4.1055e-04
Loss = 1.3252e-01, PNorm = 76.9676, GNorm = 1.4566, lr_0 = 4.1027e-04
Loss = 1.2600e-01, PNorm = 76.9797, GNorm = 0.6495, lr_0 = 4.0998e-04
Loss = 1.2177e-01, PNorm = 76.9937, GNorm = 0.5354, lr_0 = 4.0970e-04
Loss = 1.3680e-01, PNorm = 77.0022, GNorm = 0.5306, lr_0 = 4.0942e-04
Loss = 1.0697e-01, PNorm = 77.0113, GNorm = 0.7099, lr_0 = 4.0914e-04
Loss = 1.2877e-01, PNorm = 77.0182, GNorm = 0.5210, lr_0 = 4.0886e-04
Loss = 1.2695e-01, PNorm = 77.0345, GNorm = 0.9119, lr_0 = 4.0858e-04
Loss = 1.1084e-01, PNorm = 77.0431, GNorm = 0.7056, lr_0 = 4.0830e-04
Loss = 1.2190e-01, PNorm = 77.0536, GNorm = 0.5933, lr_0 = 4.0802e-04
Loss = 1.2340e-01, PNorm = 77.0603, GNorm = 0.5225, lr_0 = 4.0774e-04
Loss = 1.4418e-01, PNorm = 77.0697, GNorm = 0.7021, lr_0 = 4.0746e-04
Loss = 1.1683e-01, PNorm = 77.0829, GNorm = 1.0386, lr_0 = 4.0718e-04
Loss = 1.1786e-01, PNorm = 77.0942, GNorm = 0.5414, lr_0 = 4.0691e-04
Loss = 1.0789e-01, PNorm = 77.1036, GNorm = 0.7427, lr_0 = 4.0663e-04
Loss = 1.2505e-01, PNorm = 77.1101, GNorm = 0.8325, lr_0 = 4.0635e-04
Loss = 1.3630e-01, PNorm = 77.1214, GNorm = 0.7957, lr_0 = 4.0607e-04
Loss = 1.3367e-01, PNorm = 77.1297, GNorm = 0.6199, lr_0 = 4.0579e-04
Loss = 1.1731e-01, PNorm = 77.1384, GNorm = 0.9462, lr_0 = 4.0551e-04
Loss = 1.3814e-01, PNorm = 77.1463, GNorm = 1.0570, lr_0 = 4.0524e-04
Loss = 1.2493e-01, PNorm = 77.1565, GNorm = 0.6931, lr_0 = 4.0496e-04
Loss = 1.2867e-01, PNorm = 77.1593, GNorm = 0.7061, lr_0 = 4.0468e-04
Validation mae = 0.233728
Epoch 13
Loss = 1.1068e-01, PNorm = 77.1672, GNorm = 0.4669, lr_0 = 4.0440e-04
Loss = 1.2483e-01, PNorm = 77.1771, GNorm = 0.8433, lr_0 = 4.0413e-04
Loss = 1.0268e-01, PNorm = 77.1881, GNorm = 0.7319, lr_0 = 4.0385e-04
Loss = 1.0720e-01, PNorm = 77.1987, GNorm = 0.5493, lr_0 = 4.0357e-04
Loss = 1.2738e-01, PNorm = 77.2054, GNorm = 0.7798, lr_0 = 4.0330e-04
Loss = 1.0838e-01, PNorm = 77.2180, GNorm = 1.0238, lr_0 = 4.0302e-04
Loss = 1.2800e-01, PNorm = 77.2289, GNorm = 0.4410, lr_0 = 4.0274e-04
Loss = 1.0927e-01, PNorm = 77.2422, GNorm = 0.8355, lr_0 = 4.0247e-04
Loss = 1.1054e-01, PNorm = 77.2530, GNorm = 0.5436, lr_0 = 4.0219e-04
Loss = 1.0190e-01, PNorm = 77.2660, GNorm = 0.9546, lr_0 = 4.0192e-04
Loss = 1.0536e-01, PNorm = 77.2788, GNorm = 0.4889, lr_0 = 4.0164e-04
Loss = 1.1526e-01, PNorm = 77.2893, GNorm = 0.6076, lr_0 = 4.0137e-04
Loss = 1.1122e-01, PNorm = 77.2968, GNorm = 0.5999, lr_0 = 4.0109e-04
Loss = 1.2509e-01, PNorm = 77.3067, GNorm = 0.5255, lr_0 = 4.0082e-04
Loss = 1.0210e-01, PNorm = 77.3229, GNorm = 0.8368, lr_0 = 4.0054e-04
Loss = 1.0193e-01, PNorm = 77.3324, GNorm = 0.5685, lr_0 = 4.0027e-04
Loss = 1.1774e-01, PNorm = 77.3423, GNorm = 0.7694, lr_0 = 3.9999e-04
Loss = 1.1117e-01, PNorm = 77.3513, GNorm = 0.6064, lr_0 = 3.9972e-04
Loss = 1.1977e-01, PNorm = 77.3626, GNorm = 1.0159, lr_0 = 3.9945e-04
Loss = 1.2519e-01, PNorm = 77.3738, GNorm = 0.7518, lr_0 = 3.9917e-04
Loss = 1.1303e-01, PNorm = 77.3882, GNorm = 0.5074, lr_0 = 3.9890e-04
Loss = 1.2646e-01, PNorm = 77.4042, GNorm = 0.7316, lr_0 = 3.9863e-04
Loss = 1.2056e-01, PNorm = 77.4114, GNorm = 0.6533, lr_0 = 3.9835e-04
Loss = 1.2774e-01, PNorm = 77.4259, GNorm = 0.5912, lr_0 = 3.9808e-04
Loss = 1.1762e-01, PNorm = 77.4320, GNorm = 0.5069, lr_0 = 3.9781e-04
Loss = 1.1940e-01, PNorm = 77.4446, GNorm = 0.5757, lr_0 = 3.9753e-04
Loss = 1.0281e-01, PNorm = 77.4537, GNorm = 0.8018, lr_0 = 3.9726e-04
Loss = 1.1268e-01, PNorm = 77.4612, GNorm = 0.6438, lr_0 = 3.9699e-04
Loss = 1.2375e-01, PNorm = 77.4725, GNorm = 1.1701, lr_0 = 3.9672e-04
Loss = 1.0294e-01, PNorm = 77.4874, GNorm = 0.5096, lr_0 = 3.9645e-04
Loss = 1.1209e-01, PNorm = 77.4947, GNorm = 0.7991, lr_0 = 3.9617e-04
Loss = 1.2632e-01, PNorm = 77.4996, GNorm = 0.7604, lr_0 = 3.9590e-04
Loss = 1.1803e-01, PNorm = 77.5071, GNorm = 0.6797, lr_0 = 3.9563e-04
Loss = 1.0686e-01, PNorm = 77.5179, GNorm = 0.8579, lr_0 = 3.9536e-04
Loss = 1.2971e-01, PNorm = 77.5322, GNorm = 0.5076, lr_0 = 3.9509e-04
Loss = 1.1566e-01, PNorm = 77.5478, GNorm = 0.7952, lr_0 = 3.9482e-04
Loss = 1.0665e-01, PNorm = 77.5603, GNorm = 0.5496, lr_0 = 3.9455e-04
Loss = 1.0647e-01, PNorm = 77.5706, GNorm = 0.7122, lr_0 = 3.9428e-04
Loss = 1.0614e-01, PNorm = 77.5778, GNorm = 0.5137, lr_0 = 3.9401e-04
Loss = 1.0763e-01, PNorm = 77.5838, GNorm = 0.7259, lr_0 = 3.9374e-04
Loss = 1.3725e-01, PNorm = 77.5938, GNorm = 0.5923, lr_0 = 3.9347e-04
Loss = 1.2340e-01, PNorm = 77.6040, GNorm = 0.8365, lr_0 = 3.9320e-04
Loss = 1.1556e-01, PNorm = 77.6147, GNorm = 0.7684, lr_0 = 3.9293e-04
Loss = 1.1222e-01, PNorm = 77.6269, GNorm = 0.6663, lr_0 = 3.9266e-04
Loss = 1.1833e-01, PNorm = 77.6299, GNorm = 0.7098, lr_0 = 3.9239e-04
Loss = 1.1858e-01, PNorm = 77.6325, GNorm = 0.7500, lr_0 = 3.9212e-04
Loss = 1.0410e-01, PNorm = 77.6412, GNorm = 0.7872, lr_0 = 3.9185e-04
Loss = 1.1801e-01, PNorm = 77.6516, GNorm = 0.6790, lr_0 = 3.9159e-04
Loss = 1.2758e-01, PNorm = 77.6604, GNorm = 0.8218, lr_0 = 3.9132e-04
Loss = 1.2423e-01, PNorm = 77.6725, GNorm = 0.6972, lr_0 = 3.9105e-04
Loss = 1.0853e-01, PNorm = 77.6837, GNorm = 0.7637, lr_0 = 3.9078e-04
Loss = 1.1841e-01, PNorm = 77.6986, GNorm = 0.5760, lr_0 = 3.9051e-04
Loss = 1.1721e-01, PNorm = 77.7103, GNorm = 0.5681, lr_0 = 3.9025e-04
Loss = 1.1603e-01, PNorm = 77.7213, GNorm = 0.7098, lr_0 = 3.8998e-04
Loss = 1.4549e-01, PNorm = 77.7247, GNorm = 0.8836, lr_0 = 3.8971e-04
Loss = 1.2812e-01, PNorm = 77.7353, GNorm = 0.7086, lr_0 = 3.8945e-04
Loss = 1.1303e-01, PNorm = 77.7435, GNorm = 0.8114, lr_0 = 3.8918e-04
Loss = 1.1008e-01, PNorm = 77.7536, GNorm = 0.7265, lr_0 = 3.8891e-04
Loss = 1.1830e-01, PNorm = 77.7595, GNorm = 0.8709, lr_0 = 3.8865e-04
Loss = 1.2158e-01, PNorm = 77.7661, GNorm = 1.0404, lr_0 = 3.8838e-04
Loss = 1.1178e-01, PNorm = 77.7740, GNorm = 0.6045, lr_0 = 3.8811e-04
Loss = 1.1315e-01, PNorm = 77.7815, GNorm = 0.8674, lr_0 = 3.8785e-04
Loss = 1.1445e-01, PNorm = 77.7892, GNorm = 0.5200, lr_0 = 3.8758e-04
Loss = 1.1014e-01, PNorm = 77.7950, GNorm = 0.7305, lr_0 = 3.8732e-04
Loss = 1.3924e-01, PNorm = 77.8062, GNorm = 0.5276, lr_0 = 3.8705e-04
Loss = 1.2008e-01, PNorm = 77.8200, GNorm = 1.7548, lr_0 = 3.8679e-04
Loss = 1.3172e-01, PNorm = 77.8354, GNorm = 0.7836, lr_0 = 3.8652e-04
Loss = 1.1280e-01, PNorm = 77.8493, GNorm = 1.0419, lr_0 = 3.8626e-04
Loss = 1.1572e-01, PNorm = 77.8582, GNorm = 0.5843, lr_0 = 3.8599e-04
Loss = 1.3024e-01, PNorm = 77.8707, GNorm = 0.7305, lr_0 = 3.8573e-04
Loss = 1.1476e-01, PNorm = 77.8873, GNorm = 0.9436, lr_0 = 3.8546e-04
Loss = 1.1186e-01, PNorm = 77.8962, GNorm = 0.5467, lr_0 = 3.8520e-04
Loss = 1.1278e-01, PNorm = 77.9062, GNorm = 0.7113, lr_0 = 3.8493e-04
Loss = 1.1615e-01, PNorm = 77.9192, GNorm = 1.0322, lr_0 = 3.8467e-04
Loss = 1.0814e-01, PNorm = 77.9260, GNorm = 0.4690, lr_0 = 3.8441e-04
Loss = 1.3569e-01, PNorm = 77.9342, GNorm = 0.6246, lr_0 = 3.8414e-04
Loss = 1.1886e-01, PNorm = 77.9473, GNorm = 1.1526, lr_0 = 3.8388e-04
Loss = 1.3402e-01, PNorm = 77.9538, GNorm = 0.7611, lr_0 = 3.8362e-04
Loss = 1.2390e-01, PNorm = 77.9645, GNorm = 0.6517, lr_0 = 3.8336e-04
Loss = 1.3197e-01, PNorm = 77.9699, GNorm = 0.7612, lr_0 = 3.8309e-04
Loss = 9.8386e-02, PNorm = 77.9742, GNorm = 0.6424, lr_0 = 3.8283e-04
Loss = 1.2254e-01, PNorm = 77.9835, GNorm = 0.8213, lr_0 = 3.8257e-04
Loss = 1.3349e-01, PNorm = 77.9956, GNorm = 0.7644, lr_0 = 3.8231e-04
Loss = 1.2303e-01, PNorm = 78.0090, GNorm = 0.8176, lr_0 = 3.8204e-04
Loss = 1.0830e-01, PNorm = 78.0184, GNorm = 0.8624, lr_0 = 3.8178e-04
Loss = 1.0992e-01, PNorm = 78.0249, GNorm = 0.4705, lr_0 = 3.8152e-04
Loss = 1.1613e-01, PNorm = 78.0372, GNorm = 0.4383, lr_0 = 3.8126e-04
Loss = 1.2783e-01, PNorm = 78.0482, GNorm = 0.7556, lr_0 = 3.8100e-04
Loss = 1.2271e-01, PNorm = 78.0592, GNorm = 0.8381, lr_0 = 3.8074e-04
Loss = 1.1767e-01, PNorm = 78.0742, GNorm = 1.4261, lr_0 = 3.8048e-04
Loss = 1.2452e-01, PNorm = 78.0778, GNorm = 0.7697, lr_0 = 3.8022e-04
Loss = 1.1197e-01, PNorm = 78.0878, GNorm = 0.5595, lr_0 = 3.7995e-04
Loss = 1.1172e-01, PNorm = 78.0969, GNorm = 0.8385, lr_0 = 3.7969e-04
Loss = 1.2529e-01, PNorm = 78.1051, GNorm = 0.8986, lr_0 = 3.7943e-04
Loss = 1.0634e-01, PNorm = 78.1151, GNorm = 1.0566, lr_0 = 3.7917e-04
Loss = 1.2015e-01, PNorm = 78.1237, GNorm = 0.6028, lr_0 = 3.7891e-04
Loss = 1.3120e-01, PNorm = 78.1344, GNorm = 0.9645, lr_0 = 3.7866e-04
Loss = 1.1158e-01, PNorm = 78.1434, GNorm = 0.5838, lr_0 = 3.7840e-04
Loss = 1.0838e-01, PNorm = 78.1494, GNorm = 0.6189, lr_0 = 3.7814e-04
Loss = 1.2802e-01, PNorm = 78.1584, GNorm = 0.6160, lr_0 = 3.7788e-04
Loss = 9.9628e-02, PNorm = 78.1653, GNorm = 0.6792, lr_0 = 3.7762e-04
Loss = 1.1438e-01, PNorm = 78.1772, GNorm = 0.5646, lr_0 = 3.7736e-04
Loss = 1.0523e-01, PNorm = 78.1888, GNorm = 0.6400, lr_0 = 3.7710e-04
Loss = 1.0575e-01, PNorm = 78.1939, GNorm = 0.4436, lr_0 = 3.7684e-04
Loss = 1.1438e-01, PNorm = 78.1982, GNorm = 1.0198, lr_0 = 3.7659e-04
Loss = 1.0975e-01, PNorm = 78.2082, GNorm = 0.7495, lr_0 = 3.7633e-04
Loss = 1.2657e-01, PNorm = 78.2160, GNorm = 0.8084, lr_0 = 3.7607e-04
Loss = 1.2666e-01, PNorm = 78.2268, GNorm = 0.8008, lr_0 = 3.7581e-04
Loss = 1.0630e-01, PNorm = 78.2363, GNorm = 0.6438, lr_0 = 3.7555e-04
Loss = 1.2072e-01, PNorm = 78.2448, GNorm = 0.7150, lr_0 = 3.7530e-04
Loss = 1.2248e-01, PNorm = 78.2523, GNorm = 0.6570, lr_0 = 3.7504e-04
Loss = 1.0463e-01, PNorm = 78.2600, GNorm = 0.9863, lr_0 = 3.7478e-04
Loss = 1.1916e-01, PNorm = 78.2630, GNorm = 0.7050, lr_0 = 3.7453e-04
Loss = 1.0909e-01, PNorm = 78.2676, GNorm = 0.6994, lr_0 = 3.7427e-04
Loss = 1.2831e-01, PNorm = 78.2693, GNorm = 0.6685, lr_0 = 3.7401e-04
Loss = 1.2535e-01, PNorm = 78.2783, GNorm = 0.9985, lr_0 = 3.7376e-04
Loss = 1.2336e-01, PNorm = 78.2871, GNorm = 0.6350, lr_0 = 3.7350e-04
Loss = 1.1687e-01, PNorm = 78.2951, GNorm = 0.6496, lr_0 = 3.7325e-04
Loss = 1.2095e-01, PNorm = 78.3063, GNorm = 0.7227, lr_0 = 3.7299e-04
Loss = 1.1067e-01, PNorm = 78.3123, GNorm = 0.7143, lr_0 = 3.7273e-04
Validation mae = 0.233131
Epoch 14
Loss = 9.8385e-02, PNorm = 78.3226, GNorm = 0.6389, lr_0 = 3.7248e-04
Loss = 1.2145e-01, PNorm = 78.3352, GNorm = 0.6055, lr_0 = 3.7222e-04
Loss = 1.0327e-01, PNorm = 78.3462, GNorm = 0.6351, lr_0 = 3.7197e-04
Loss = 1.2016e-01, PNorm = 78.3555, GNorm = 0.5285, lr_0 = 3.7171e-04
Loss = 1.1267e-01, PNorm = 78.3654, GNorm = 0.5887, lr_0 = 3.7146e-04
Loss = 1.0061e-01, PNorm = 78.3790, GNorm = 0.5403, lr_0 = 3.7120e-04
Loss = 1.1235e-01, PNorm = 78.3886, GNorm = 0.7507, lr_0 = 3.7095e-04
Loss = 1.0849e-01, PNorm = 78.4033, GNorm = 0.6551, lr_0 = 3.7070e-04
Loss = 1.1067e-01, PNorm = 78.4158, GNorm = 0.5832, lr_0 = 3.7044e-04
Loss = 1.0191e-01, PNorm = 78.4249, GNorm = 0.5548, lr_0 = 3.7019e-04
Loss = 9.8309e-02, PNorm = 78.4336, GNorm = 0.8623, lr_0 = 3.6993e-04
Loss = 1.1465e-01, PNorm = 78.4413, GNorm = 0.6978, lr_0 = 3.6968e-04
Loss = 1.1207e-01, PNorm = 78.4494, GNorm = 0.7016, lr_0 = 3.6943e-04
Loss = 1.2288e-01, PNorm = 78.4587, GNorm = 0.7303, lr_0 = 3.6917e-04
Loss = 1.1517e-01, PNorm = 78.4712, GNorm = 0.9499, lr_0 = 3.6892e-04
Loss = 1.1966e-01, PNorm = 78.4859, GNorm = 0.6846, lr_0 = 3.6867e-04
Loss = 1.1259e-01, PNorm = 78.4974, GNorm = 0.7570, lr_0 = 3.6842e-04
Loss = 9.2184e-02, PNorm = 78.5039, GNorm = 0.5066, lr_0 = 3.6816e-04
Loss = 1.0620e-01, PNorm = 78.5101, GNorm = 0.8191, lr_0 = 3.6791e-04
Loss = 1.0749e-01, PNorm = 78.5192, GNorm = 0.6064, lr_0 = 3.6766e-04
Loss = 9.8025e-02, PNorm = 78.5261, GNorm = 0.5106, lr_0 = 3.6741e-04
Loss = 1.1021e-01, PNorm = 78.5342, GNorm = 1.2631, lr_0 = 3.6716e-04
Loss = 1.1137e-01, PNorm = 78.5411, GNorm = 0.4812, lr_0 = 3.6690e-04
Loss = 1.0980e-01, PNorm = 78.5532, GNorm = 0.9696, lr_0 = 3.6665e-04
Loss = 1.1915e-01, PNorm = 78.5648, GNorm = 0.6490, lr_0 = 3.6640e-04
Loss = 1.2017e-01, PNorm = 78.5720, GNorm = 0.8529, lr_0 = 3.6615e-04
Loss = 1.1730e-01, PNorm = 78.5809, GNorm = 1.5947, lr_0 = 3.6590e-04
Loss = 1.3660e-01, PNorm = 78.5892, GNorm = 0.6801, lr_0 = 3.6565e-04
Loss = 1.4991e-01, PNorm = 78.6098, GNorm = 0.5430, lr_0 = 3.6540e-04
Loss = 1.1322e-01, PNorm = 78.6192, GNorm = 0.8283, lr_0 = 3.6515e-04
Loss = 1.0483e-01, PNorm = 78.6288, GNorm = 0.8417, lr_0 = 3.6490e-04
Loss = 1.0735e-01, PNorm = 78.6357, GNorm = 0.8166, lr_0 = 3.6465e-04
Loss = 1.2483e-01, PNorm = 78.6435, GNorm = 0.6284, lr_0 = 3.6440e-04
Loss = 1.1123e-01, PNorm = 78.6526, GNorm = 0.6537, lr_0 = 3.6415e-04
Loss = 1.2505e-01, PNorm = 78.6620, GNorm = 0.6907, lr_0 = 3.6390e-04
Loss = 1.2857e-01, PNorm = 78.6747, GNorm = 0.7957, lr_0 = 3.6365e-04
Loss = 9.7974e-02, PNorm = 78.6828, GNorm = 0.5019, lr_0 = 3.6340e-04
Loss = 1.1157e-01, PNorm = 78.6918, GNorm = 0.8580, lr_0 = 3.6315e-04
Loss = 9.9830e-02, PNorm = 78.7016, GNorm = 0.5459, lr_0 = 3.6290e-04
Loss = 1.0438e-01, PNorm = 78.7121, GNorm = 0.6489, lr_0 = 3.6266e-04
Loss = 1.1648e-01, PNorm = 78.7178, GNorm = 0.7635, lr_0 = 3.6241e-04
Loss = 1.0942e-01, PNorm = 78.7277, GNorm = 0.5310, lr_0 = 3.6216e-04
Loss = 1.2549e-01, PNorm = 78.7357, GNorm = 0.8148, lr_0 = 3.6191e-04
Loss = 1.1132e-01, PNorm = 78.7477, GNorm = 0.6228, lr_0 = 3.6166e-04
Loss = 1.2610e-01, PNorm = 78.7564, GNorm = 0.7958, lr_0 = 3.6141e-04
Loss = 1.1751e-01, PNorm = 78.7661, GNorm = 0.8264, lr_0 = 3.6117e-04
Loss = 1.1599e-01, PNorm = 78.7795, GNorm = 0.5545, lr_0 = 3.6092e-04
Loss = 1.2530e-01, PNorm = 78.7918, GNorm = 0.7746, lr_0 = 3.6067e-04
Loss = 1.0034e-01, PNorm = 78.8007, GNorm = 0.6085, lr_0 = 3.6043e-04
Loss = 1.2067e-01, PNorm = 78.8124, GNorm = 0.6163, lr_0 = 3.6018e-04
Loss = 1.0924e-01, PNorm = 78.8211, GNorm = 0.7127, lr_0 = 3.5993e-04
Loss = 1.0063e-01, PNorm = 78.8319, GNorm = 0.6341, lr_0 = 3.5969e-04
Loss = 1.2381e-01, PNorm = 78.8368, GNorm = 0.5829, lr_0 = 3.5944e-04
Loss = 1.0997e-01, PNorm = 78.8472, GNorm = 0.7430, lr_0 = 3.5919e-04
Loss = 1.0896e-01, PNorm = 78.8609, GNorm = 0.5782, lr_0 = 3.5895e-04
Loss = 1.0880e-01, PNorm = 78.8654, GNorm = 0.8501, lr_0 = 3.5870e-04
Loss = 1.2873e-01, PNorm = 78.8696, GNorm = 0.5860, lr_0 = 3.5845e-04
Loss = 1.1638e-01, PNorm = 78.8814, GNorm = 0.5891, lr_0 = 3.5821e-04
Loss = 9.6762e-02, PNorm = 78.8913, GNorm = 0.4598, lr_0 = 3.5796e-04
Loss = 1.1923e-01, PNorm = 78.8959, GNorm = 0.5476, lr_0 = 3.5772e-04
Loss = 1.2165e-01, PNorm = 78.9072, GNorm = 0.5590, lr_0 = 3.5747e-04
Loss = 1.2101e-01, PNorm = 78.9169, GNorm = 0.8016, lr_0 = 3.5723e-04
Loss = 1.3389e-01, PNorm = 78.9261, GNorm = 0.9577, lr_0 = 3.5698e-04
Loss = 1.1704e-01, PNorm = 78.9319, GNorm = 0.5582, lr_0 = 3.5674e-04
Loss = 1.1580e-01, PNorm = 78.9422, GNorm = 0.4700, lr_0 = 3.5650e-04
Loss = 1.3135e-01, PNorm = 78.9499, GNorm = 0.6913, lr_0 = 3.5625e-04
Loss = 1.2955e-01, PNorm = 78.9531, GNorm = 0.4172, lr_0 = 3.5601e-04
Loss = 1.1785e-01, PNorm = 78.9641, GNorm = 0.8439, lr_0 = 3.5576e-04
Loss = 1.1280e-01, PNorm = 78.9733, GNorm = 0.6792, lr_0 = 3.5552e-04
Loss = 1.1078e-01, PNorm = 78.9813, GNorm = 0.4930, lr_0 = 3.5528e-04
Loss = 1.2172e-01, PNorm = 78.9887, GNorm = 1.0491, lr_0 = 3.5503e-04
Loss = 1.0266e-01, PNorm = 78.9990, GNorm = 0.4878, lr_0 = 3.5479e-04
Loss = 1.0602e-01, PNorm = 79.0126, GNorm = 0.6196, lr_0 = 3.5455e-04
Loss = 1.2032e-01, PNorm = 79.0203, GNorm = 1.0394, lr_0 = 3.5430e-04
Loss = 1.0069e-01, PNorm = 79.0238, GNorm = 0.6290, lr_0 = 3.5406e-04
Loss = 1.1273e-01, PNorm = 79.0314, GNorm = 0.5125, lr_0 = 3.5382e-04
Loss = 1.1451e-01, PNorm = 79.0443, GNorm = 0.6352, lr_0 = 3.5358e-04
Loss = 1.2966e-01, PNorm = 79.0561, GNorm = 0.6870, lr_0 = 3.5333e-04
Loss = 1.0101e-01, PNorm = 79.0644, GNorm = 1.0113, lr_0 = 3.5309e-04
Loss = 1.0498e-01, PNorm = 79.0717, GNorm = 0.6976, lr_0 = 3.5285e-04
Loss = 1.1158e-01, PNorm = 79.0746, GNorm = 0.5771, lr_0 = 3.5261e-04
Loss = 1.1520e-01, PNorm = 79.0799, GNorm = 0.8498, lr_0 = 3.5237e-04
Loss = 1.0487e-01, PNorm = 79.0909, GNorm = 0.9892, lr_0 = 3.5212e-04
Loss = 1.1348e-01, PNorm = 79.0967, GNorm = 0.6683, lr_0 = 3.5188e-04
Loss = 9.5934e-02, PNorm = 79.0995, GNorm = 0.5125, lr_0 = 3.5164e-04
Loss = 1.0108e-01, PNorm = 79.1070, GNorm = 0.5747, lr_0 = 3.5140e-04
Loss = 1.0545e-01, PNorm = 79.1086, GNorm = 0.6800, lr_0 = 3.5116e-04
Loss = 1.3853e-01, PNorm = 79.1165, GNorm = 0.6975, lr_0 = 3.5092e-04
Loss = 1.0705e-01, PNorm = 79.1327, GNorm = 0.6321, lr_0 = 3.5068e-04
Loss = 1.1247e-01, PNorm = 79.1428, GNorm = 0.6121, lr_0 = 3.5044e-04
Loss = 1.1477e-01, PNorm = 79.1541, GNorm = 0.7126, lr_0 = 3.5020e-04
Loss = 1.1347e-01, PNorm = 79.1667, GNorm = 0.7258, lr_0 = 3.4996e-04
Loss = 1.1485e-01, PNorm = 79.1815, GNorm = 0.6990, lr_0 = 3.4972e-04
Loss = 1.2041e-01, PNorm = 79.1913, GNorm = 0.7985, lr_0 = 3.4948e-04
Loss = 1.0065e-01, PNorm = 79.1969, GNorm = 0.6654, lr_0 = 3.4924e-04
Loss = 1.1267e-01, PNorm = 79.2046, GNorm = 0.6340, lr_0 = 3.4900e-04
Loss = 1.0543e-01, PNorm = 79.2140, GNorm = 0.9683, lr_0 = 3.4876e-04
Loss = 1.2009e-01, PNorm = 79.2209, GNorm = 0.8536, lr_0 = 3.4852e-04
Loss = 1.1711e-01, PNorm = 79.2248, GNorm = 0.5040, lr_0 = 3.4828e-04
Loss = 1.0504e-01, PNorm = 79.2306, GNorm = 0.6554, lr_0 = 3.4805e-04
Loss = 1.1669e-01, PNorm = 79.2367, GNorm = 0.6304, lr_0 = 3.4781e-04
Loss = 1.1839e-01, PNorm = 79.2407, GNorm = 0.5802, lr_0 = 3.4757e-04
Loss = 1.0189e-01, PNorm = 79.2516, GNorm = 0.7401, lr_0 = 3.4733e-04
Loss = 1.0608e-01, PNorm = 79.2567, GNorm = 0.6611, lr_0 = 3.4709e-04
Loss = 1.2066e-01, PNorm = 79.2662, GNorm = 0.8436, lr_0 = 3.4686e-04
Loss = 1.1437e-01, PNorm = 79.2743, GNorm = 0.5503, lr_0 = 3.4662e-04
Loss = 1.2392e-01, PNorm = 79.2836, GNorm = 0.7146, lr_0 = 3.4638e-04
Loss = 1.0417e-01, PNorm = 79.2895, GNorm = 0.8810, lr_0 = 3.4614e-04
Loss = 1.0367e-01, PNorm = 79.2964, GNorm = 0.5749, lr_0 = 3.4591e-04
Loss = 1.1422e-01, PNorm = 79.3026, GNorm = 0.7050, lr_0 = 3.4567e-04
Loss = 9.0283e-02, PNorm = 79.3087, GNorm = 0.6477, lr_0 = 3.4543e-04
Loss = 1.0651e-01, PNorm = 79.3157, GNorm = 1.3265, lr_0 = 3.4520e-04
Loss = 1.0269e-01, PNorm = 79.3250, GNorm = 1.0198, lr_0 = 3.4496e-04
Loss = 1.0775e-01, PNorm = 79.3363, GNorm = 0.8356, lr_0 = 3.4472e-04
Loss = 1.0464e-01, PNorm = 79.3404, GNorm = 0.7816, lr_0 = 3.4449e-04
Loss = 1.1541e-01, PNorm = 79.3452, GNorm = 0.8251, lr_0 = 3.4425e-04
Loss = 1.1332e-01, PNorm = 79.3537, GNorm = 0.7917, lr_0 = 3.4402e-04
Loss = 1.1437e-01, PNorm = 79.3543, GNorm = 0.6178, lr_0 = 3.4378e-04
Loss = 1.2430e-01, PNorm = 79.3602, GNorm = 0.8099, lr_0 = 3.4354e-04
Loss = 9.4936e-02, PNorm = 79.3666, GNorm = 1.0658, lr_0 = 3.4331e-04
Validation mae = 0.229974
Epoch 15
Loss = 1.1038e-01, PNorm = 79.3742, GNorm = 0.6775, lr_0 = 3.4307e-04
Loss = 1.2153e-01, PNorm = 79.3804, GNorm = 0.7490, lr_0 = 3.4284e-04
Loss = 1.0077e-01, PNorm = 79.3895, GNorm = 0.6548, lr_0 = 3.4260e-04
Loss = 1.0677e-01, PNorm = 79.3991, GNorm = 0.6200, lr_0 = 3.4237e-04
Loss = 1.0553e-01, PNorm = 79.4085, GNorm = 0.7355, lr_0 = 3.4213e-04
Loss = 1.1136e-01, PNorm = 79.4203, GNorm = 0.6907, lr_0 = 3.4190e-04
Loss = 9.5357e-02, PNorm = 79.4311, GNorm = 0.8067, lr_0 = 3.4167e-04
Loss = 1.0322e-01, PNorm = 79.4439, GNorm = 0.5629, lr_0 = 3.4143e-04
Loss = 1.1624e-01, PNorm = 79.4527, GNorm = 0.9063, lr_0 = 3.4120e-04
Loss = 1.0912e-01, PNorm = 79.4573, GNorm = 0.9110, lr_0 = 3.4096e-04
Loss = 1.0447e-01, PNorm = 79.4642, GNorm = 0.6398, lr_0 = 3.4073e-04
Loss = 9.6932e-02, PNorm = 79.4760, GNorm = 0.6053, lr_0 = 3.4050e-04
Loss = 1.0140e-01, PNorm = 79.4793, GNorm = 0.7462, lr_0 = 3.4026e-04
Loss = 1.0965e-01, PNorm = 79.4856, GNorm = 0.8230, lr_0 = 3.4003e-04
Loss = 1.0755e-01, PNorm = 79.4929, GNorm = 0.6681, lr_0 = 3.3980e-04
Loss = 1.1964e-01, PNorm = 79.4995, GNorm = 0.7281, lr_0 = 3.3956e-04
Loss = 1.0863e-01, PNorm = 79.5058, GNorm = 0.7614, lr_0 = 3.3933e-04
Loss = 1.1237e-01, PNorm = 79.5144, GNorm = 0.6019, lr_0 = 3.3910e-04
Loss = 1.1759e-01, PNorm = 79.5230, GNorm = 0.6577, lr_0 = 3.3887e-04
Loss = 1.0812e-01, PNorm = 79.5306, GNorm = 0.7331, lr_0 = 3.3864e-04
Loss = 1.0047e-01, PNorm = 79.5409, GNorm = 0.5450, lr_0 = 3.3840e-04
Loss = 9.4413e-02, PNorm = 79.5516, GNorm = 0.6422, lr_0 = 3.3817e-04
Loss = 1.1878e-01, PNorm = 79.5574, GNorm = 0.6850, lr_0 = 3.3794e-04
Loss = 1.0900e-01, PNorm = 79.5620, GNorm = 1.1540, lr_0 = 3.3771e-04
Loss = 9.7942e-02, PNorm = 79.5700, GNorm = 0.7275, lr_0 = 3.3748e-04
Loss = 9.2982e-02, PNorm = 79.5794, GNorm = 0.5823, lr_0 = 3.3725e-04
Loss = 9.8623e-02, PNorm = 79.5873, GNorm = 0.7337, lr_0 = 3.3701e-04
Loss = 1.0568e-01, PNorm = 79.5922, GNorm = 0.5381, lr_0 = 3.3678e-04
Loss = 1.0815e-01, PNorm = 79.5967, GNorm = 0.7405, lr_0 = 3.3655e-04
Loss = 1.0784e-01, PNorm = 79.6060, GNorm = 0.6690, lr_0 = 3.3632e-04
Loss = 1.0371e-01, PNorm = 79.6114, GNorm = 1.0871, lr_0 = 3.3609e-04
Loss = 8.9815e-02, PNorm = 79.6190, GNorm = 0.8419, lr_0 = 3.3586e-04
Loss = 9.8856e-02, PNorm = 79.6290, GNorm = 0.7931, lr_0 = 3.3563e-04
Loss = 1.1554e-01, PNorm = 79.6388, GNorm = 0.6505, lr_0 = 3.3540e-04
Loss = 1.0864e-01, PNorm = 79.6478, GNorm = 0.6714, lr_0 = 3.3517e-04
Loss = 1.0349e-01, PNorm = 79.6575, GNorm = 0.5798, lr_0 = 3.3494e-04
Loss = 9.9780e-02, PNorm = 79.6630, GNorm = 0.6505, lr_0 = 3.3471e-04
Loss = 1.1906e-01, PNorm = 79.6708, GNorm = 0.7706, lr_0 = 3.3448e-04
Loss = 1.0041e-01, PNorm = 79.6768, GNorm = 0.6447, lr_0 = 3.3425e-04
Loss = 1.1184e-01, PNorm = 79.6837, GNorm = 0.5966, lr_0 = 3.3403e-04
Loss = 1.0902e-01, PNorm = 79.6955, GNorm = 0.7111, lr_0 = 3.3380e-04
Loss = 1.1415e-01, PNorm = 79.7058, GNorm = 0.6735, lr_0 = 3.3357e-04
Loss = 9.8423e-02, PNorm = 79.7119, GNorm = 1.2679, lr_0 = 3.3334e-04
Loss = 1.2167e-01, PNorm = 79.7220, GNorm = 0.6344, lr_0 = 3.3311e-04
Loss = 1.0148e-01, PNorm = 79.7293, GNorm = 0.7550, lr_0 = 3.3288e-04
Loss = 1.2002e-01, PNorm = 79.7360, GNorm = 0.5827, lr_0 = 3.3265e-04
Loss = 1.1436e-01, PNorm = 79.7456, GNorm = 0.9529, lr_0 = 3.3243e-04
Loss = 1.1407e-01, PNorm = 79.7534, GNorm = 0.8588, lr_0 = 3.3220e-04
Loss = 1.2666e-01, PNorm = 79.7647, GNorm = 0.8269, lr_0 = 3.3197e-04
Loss = 1.0627e-01, PNorm = 79.7723, GNorm = 0.6642, lr_0 = 3.3174e-04
Loss = 1.0493e-01, PNorm = 79.7771, GNorm = 0.6649, lr_0 = 3.3152e-04
Loss = 1.1078e-01, PNorm = 79.7867, GNorm = 0.8765, lr_0 = 3.3129e-04
Loss = 1.0801e-01, PNorm = 79.7939, GNorm = 0.6786, lr_0 = 3.3106e-04
Loss = 1.0616e-01, PNorm = 79.8073, GNorm = 0.5917, lr_0 = 3.3084e-04
Loss = 1.0292e-01, PNorm = 79.8142, GNorm = 0.5383, lr_0 = 3.3061e-04
Loss = 1.0460e-01, PNorm = 79.8188, GNorm = 0.7800, lr_0 = 3.3038e-04
Loss = 1.2262e-01, PNorm = 79.8222, GNorm = 0.8055, lr_0 = 3.3016e-04
Loss = 1.0104e-01, PNorm = 79.8280, GNorm = 0.9040, lr_0 = 3.2993e-04
Loss = 1.0274e-01, PNorm = 79.8360, GNorm = 0.4641, lr_0 = 3.2970e-04
Loss = 1.1016e-01, PNorm = 79.8472, GNorm = 0.8579, lr_0 = 3.2948e-04
Loss = 1.0369e-01, PNorm = 79.8548, GNorm = 0.6792, lr_0 = 3.2925e-04
Loss = 1.1950e-01, PNorm = 79.8571, GNorm = 0.5515, lr_0 = 3.2903e-04
Loss = 8.9162e-02, PNorm = 79.8624, GNorm = 0.5377, lr_0 = 3.2880e-04
Loss = 1.0499e-01, PNorm = 79.8718, GNorm = 1.0018, lr_0 = 3.2858e-04
Loss = 9.8181e-02, PNorm = 79.8734, GNorm = 0.4859, lr_0 = 3.2835e-04
Loss = 1.1332e-01, PNorm = 79.8801, GNorm = 0.8388, lr_0 = 3.2813e-04
Loss = 1.1477e-01, PNorm = 79.8880, GNorm = 0.7050, lr_0 = 3.2790e-04
Loss = 1.0253e-01, PNorm = 79.8949, GNorm = 0.5927, lr_0 = 3.2768e-04
Loss = 1.1299e-01, PNorm = 79.9021, GNorm = 1.0402, lr_0 = 3.2745e-04
Loss = 1.2820e-01, PNorm = 79.9108, GNorm = 0.6306, lr_0 = 3.2723e-04
Loss = 1.0843e-01, PNorm = 79.9205, GNorm = 0.7671, lr_0 = 3.2700e-04
Loss = 1.1275e-01, PNorm = 79.9255, GNorm = 0.4650, lr_0 = 3.2678e-04
Loss = 9.8415e-02, PNorm = 79.9369, GNorm = 0.4900, lr_0 = 3.2656e-04
Loss = 8.0458e-02, PNorm = 79.9485, GNorm = 0.5354, lr_0 = 3.2633e-04
Loss = 1.0578e-01, PNorm = 79.9546, GNorm = 0.4564, lr_0 = 3.2611e-04
Loss = 1.1526e-01, PNorm = 79.9626, GNorm = 0.6376, lr_0 = 3.2589e-04
Loss = 1.1426e-01, PNorm = 79.9668, GNorm = 0.6831, lr_0 = 3.2566e-04
Loss = 9.9565e-02, PNorm = 79.9732, GNorm = 0.7790, lr_0 = 3.2544e-04
Loss = 8.9876e-02, PNorm = 79.9785, GNorm = 0.6165, lr_0 = 3.2522e-04
Loss = 9.8789e-02, PNorm = 79.9811, GNorm = 0.6592, lr_0 = 3.2499e-04
Loss = 1.2870e-01, PNorm = 79.9897, GNorm = 0.5539, lr_0 = 3.2477e-04
Loss = 1.2860e-01, PNorm = 79.9992, GNorm = 0.7835, lr_0 = 3.2455e-04
Loss = 1.1715e-01, PNorm = 80.0092, GNorm = 0.7164, lr_0 = 3.2433e-04
Loss = 1.1237e-01, PNorm = 80.0159, GNorm = 0.6065, lr_0 = 3.2410e-04
Loss = 1.0754e-01, PNorm = 80.0235, GNorm = 0.6426, lr_0 = 3.2388e-04
Loss = 1.0038e-01, PNorm = 80.0336, GNorm = 0.5630, lr_0 = 3.2366e-04
Loss = 1.0844e-01, PNorm = 80.0374, GNorm = 0.7477, lr_0 = 3.2344e-04
Loss = 1.0332e-01, PNorm = 80.0453, GNorm = 0.6127, lr_0 = 3.2322e-04
Loss = 9.9671e-02, PNorm = 80.0587, GNorm = 0.8178, lr_0 = 3.2300e-04
Loss = 1.1506e-01, PNorm = 80.0643, GNorm = 0.7785, lr_0 = 3.2277e-04
Loss = 1.0513e-01, PNorm = 80.0709, GNorm = 0.5650, lr_0 = 3.2255e-04
Loss = 1.0304e-01, PNorm = 80.0750, GNorm = 0.7627, lr_0 = 3.2233e-04
Loss = 1.0149e-01, PNorm = 80.0823, GNorm = 0.5527, lr_0 = 3.2211e-04
Loss = 1.1145e-01, PNorm = 80.0899, GNorm = 0.9960, lr_0 = 3.2189e-04
Loss = 1.1456e-01, PNorm = 80.0976, GNorm = 0.8838, lr_0 = 3.2167e-04
Loss = 1.0447e-01, PNorm = 80.1055, GNorm = 0.7197, lr_0 = 3.2145e-04
Loss = 1.1742e-01, PNorm = 80.1123, GNorm = 0.8151, lr_0 = 3.2123e-04
Loss = 1.0506e-01, PNorm = 80.1195, GNorm = 0.6154, lr_0 = 3.2101e-04
Loss = 1.0807e-01, PNorm = 80.1260, GNorm = 0.6063, lr_0 = 3.2079e-04
Loss = 1.1138e-01, PNorm = 80.1305, GNorm = 0.6712, lr_0 = 3.2057e-04
Loss = 9.5860e-02, PNorm = 80.1365, GNorm = 0.6103, lr_0 = 3.2035e-04
Loss = 1.1365e-01, PNorm = 80.1429, GNorm = 0.6861, lr_0 = 3.2013e-04
Loss = 1.0916e-01, PNorm = 80.1461, GNorm = 0.5471, lr_0 = 3.1991e-04
Loss = 1.0443e-01, PNorm = 80.1566, GNorm = 0.5988, lr_0 = 3.1969e-04
Loss = 1.1842e-01, PNorm = 80.1617, GNorm = 0.7913, lr_0 = 3.1947e-04
Loss = 1.1977e-01, PNorm = 80.1680, GNorm = 1.1148, lr_0 = 3.1925e-04
Loss = 1.2156e-01, PNorm = 80.1789, GNorm = 0.6594, lr_0 = 3.1904e-04
Loss = 1.1258e-01, PNorm = 80.1849, GNorm = 0.8126, lr_0 = 3.1882e-04
Loss = 1.2066e-01, PNorm = 80.1930, GNorm = 0.7657, lr_0 = 3.1860e-04
Loss = 1.1197e-01, PNorm = 80.1969, GNorm = 0.8496, lr_0 = 3.1838e-04
Loss = 1.1255e-01, PNorm = 80.2048, GNorm = 0.5348, lr_0 = 3.1816e-04
Loss = 1.1322e-01, PNorm = 80.2119, GNorm = 1.2957, lr_0 = 3.1794e-04
Loss = 1.1442e-01, PNorm = 80.2217, GNorm = 1.1538, lr_0 = 3.1773e-04
Loss = 1.0963e-01, PNorm = 80.2321, GNorm = 0.8735, lr_0 = 3.1751e-04
Loss = 9.9196e-02, PNorm = 80.2397, GNorm = 0.7005, lr_0 = 3.1729e-04
Loss = 1.0713e-01, PNorm = 80.2526, GNorm = 0.6432, lr_0 = 3.1707e-04
Loss = 1.0467e-01, PNorm = 80.2615, GNorm = 0.6856, lr_0 = 3.1686e-04
Loss = 1.2177e-01, PNorm = 80.2719, GNorm = 0.7851, lr_0 = 3.1664e-04
Loss = 1.0891e-01, PNorm = 80.2817, GNorm = 0.7193, lr_0 = 3.1642e-04
Loss = 1.1175e-01, PNorm = 80.2872, GNorm = 0.8018, lr_0 = 3.1621e-04
Validation mae = 0.231258
Epoch 16
Loss = 9.8989e-02, PNorm = 80.2965, GNorm = 0.5816, lr_0 = 3.1599e-04
Loss = 9.5411e-02, PNorm = 80.3041, GNorm = 0.9704, lr_0 = 3.1577e-04
Loss = 9.7431e-02, PNorm = 80.3095, GNorm = 0.9619, lr_0 = 3.1556e-04
Loss = 1.0804e-01, PNorm = 80.3204, GNorm = 0.6257, lr_0 = 3.1534e-04
Loss = 9.3928e-02, PNorm = 80.3276, GNorm = 0.6029, lr_0 = 3.1512e-04
Loss = 9.8476e-02, PNorm = 80.3333, GNorm = 0.4996, lr_0 = 3.1491e-04
Loss = 9.8692e-02, PNorm = 80.3371, GNorm = 0.6675, lr_0 = 3.1469e-04
Loss = 1.1603e-01, PNorm = 80.3399, GNorm = 0.7360, lr_0 = 3.1448e-04
Loss = 9.3533e-02, PNorm = 80.3476, GNorm = 0.5527, lr_0 = 3.1426e-04
Loss = 1.0364e-01, PNorm = 80.3579, GNorm = 0.5856, lr_0 = 3.1405e-04
Loss = 1.1701e-01, PNorm = 80.3708, GNorm = 0.8305, lr_0 = 3.1383e-04
Loss = 9.9836e-02, PNorm = 80.3747, GNorm = 0.6576, lr_0 = 3.1362e-04
Loss = 1.0456e-01, PNorm = 80.3837, GNorm = 0.6913, lr_0 = 3.1340e-04
Loss = 9.9656e-02, PNorm = 80.3937, GNorm = 0.9727, lr_0 = 3.1319e-04
Loss = 9.9630e-02, PNorm = 80.4019, GNorm = 0.5282, lr_0 = 3.1297e-04
Loss = 9.5309e-02, PNorm = 80.4088, GNorm = 0.6704, lr_0 = 3.1276e-04
Loss = 9.7624e-02, PNorm = 80.4141, GNorm = 0.7091, lr_0 = 3.1254e-04
Loss = 1.0045e-01, PNorm = 80.4174, GNorm = 0.5726, lr_0 = 3.1233e-04
Loss = 9.9790e-02, PNorm = 80.4238, GNorm = 0.7365, lr_0 = 3.1212e-04
Loss = 1.1503e-01, PNorm = 80.4271, GNorm = 0.7163, lr_0 = 3.1190e-04
Loss = 1.1739e-01, PNorm = 80.4333, GNorm = 0.6330, lr_0 = 3.1169e-04
Loss = 9.2082e-02, PNorm = 80.4404, GNorm = 0.5978, lr_0 = 3.1147e-04
Loss = 1.0198e-01, PNorm = 80.4467, GNorm = 0.6823, lr_0 = 3.1126e-04
Loss = 1.0006e-01, PNorm = 80.4543, GNorm = 0.6501, lr_0 = 3.1105e-04
Loss = 1.1362e-01, PNorm = 80.4616, GNorm = 0.7113, lr_0 = 3.1083e-04
Loss = 1.0332e-01, PNorm = 80.4649, GNorm = 0.7928, lr_0 = 3.1062e-04
Loss = 1.0792e-01, PNorm = 80.4706, GNorm = 0.6739, lr_0 = 3.1041e-04
Loss = 1.0397e-01, PNorm = 80.4809, GNorm = 0.6819, lr_0 = 3.1020e-04
Loss = 1.0552e-01, PNorm = 80.4871, GNorm = 0.7493, lr_0 = 3.0998e-04
Loss = 1.0318e-01, PNorm = 80.4956, GNorm = 1.3443, lr_0 = 3.0977e-04
Loss = 1.0348e-01, PNorm = 80.5017, GNorm = 0.7642, lr_0 = 3.0956e-04
Loss = 8.6361e-02, PNorm = 80.5132, GNorm = 0.4776, lr_0 = 3.0935e-04
Loss = 1.0521e-01, PNorm = 80.5221, GNorm = 0.6874, lr_0 = 3.0914e-04
Loss = 1.1261e-01, PNorm = 80.5238, GNorm = 0.6365, lr_0 = 3.0892e-04
Loss = 9.5897e-02, PNorm = 80.5294, GNorm = 0.6805, lr_0 = 3.0871e-04
Loss = 1.0562e-01, PNorm = 80.5346, GNorm = 0.6646, lr_0 = 3.0850e-04
Loss = 1.0457e-01, PNorm = 80.5410, GNorm = 0.4197, lr_0 = 3.0829e-04
Loss = 1.0376e-01, PNorm = 80.5483, GNorm = 0.8047, lr_0 = 3.0808e-04
Loss = 9.3161e-02, PNorm = 80.5539, GNorm = 0.6004, lr_0 = 3.0787e-04
Loss = 9.8179e-02, PNorm = 80.5612, GNorm = 0.7991, lr_0 = 3.0766e-04
Loss = 1.0148e-01, PNorm = 80.5685, GNorm = 0.8432, lr_0 = 3.0745e-04
Loss = 1.1205e-01, PNorm = 80.5754, GNorm = 0.6617, lr_0 = 3.0723e-04
Loss = 1.0794e-01, PNorm = 80.5829, GNorm = 0.6691, lr_0 = 3.0702e-04
Loss = 1.1690e-01, PNorm = 80.5921, GNorm = 0.7938, lr_0 = 3.0681e-04
Loss = 1.0225e-01, PNorm = 80.6002, GNorm = 0.6582, lr_0 = 3.0660e-04
Loss = 1.0250e-01, PNorm = 80.6063, GNorm = 0.4391, lr_0 = 3.0639e-04
Loss = 1.0671e-01, PNorm = 80.6135, GNorm = 1.2977, lr_0 = 3.0618e-04
Loss = 1.0091e-01, PNorm = 80.6165, GNorm = 0.8375, lr_0 = 3.0597e-04
Loss = 9.6002e-02, PNorm = 80.6178, GNorm = 0.5042, lr_0 = 3.0576e-04
Loss = 1.0263e-01, PNorm = 80.6275, GNorm = 0.6510, lr_0 = 3.0555e-04
Loss = 1.1004e-01, PNorm = 80.6349, GNorm = 0.9916, lr_0 = 3.0535e-04
Loss = 8.6193e-02, PNorm = 80.6385, GNorm = 0.5709, lr_0 = 3.0514e-04
Loss = 1.0588e-01, PNorm = 80.6445, GNorm = 0.5036, lr_0 = 3.0493e-04
Loss = 1.0960e-01, PNorm = 80.6520, GNorm = 0.7892, lr_0 = 3.0472e-04
Loss = 1.1178e-01, PNorm = 80.6584, GNorm = 0.6476, lr_0 = 3.0451e-04
Loss = 8.8912e-02, PNorm = 80.6636, GNorm = 0.7352, lr_0 = 3.0430e-04
Loss = 1.3492e-01, PNorm = 80.6671, GNorm = 1.0135, lr_0 = 3.0409e-04
Loss = 1.0729e-01, PNorm = 80.6757, GNorm = 0.6019, lr_0 = 3.0388e-04
Loss = 1.0063e-01, PNorm = 80.6799, GNorm = 0.7908, lr_0 = 3.0368e-04
Loss = 1.0392e-01, PNorm = 80.6856, GNorm = 0.6679, lr_0 = 3.0347e-04
Loss = 1.0881e-01, PNorm = 80.6936, GNorm = 1.0117, lr_0 = 3.0326e-04
Loss = 1.0562e-01, PNorm = 80.7002, GNorm = 0.6325, lr_0 = 3.0305e-04
Loss = 1.1649e-01, PNorm = 80.7065, GNorm = 1.1251, lr_0 = 3.0284e-04
Loss = 9.3781e-02, PNorm = 80.7133, GNorm = 0.6779, lr_0 = 3.0264e-04
Loss = 9.7228e-02, PNorm = 80.7178, GNorm = 0.7425, lr_0 = 3.0243e-04
Loss = 1.1889e-01, PNorm = 80.7249, GNorm = 0.7587, lr_0 = 3.0222e-04
Loss = 8.9606e-02, PNorm = 80.7308, GNorm = 0.8716, lr_0 = 3.0202e-04
Loss = 8.8568e-02, PNorm = 80.7377, GNorm = 0.6773, lr_0 = 3.0181e-04
Loss = 9.2471e-02, PNorm = 80.7386, GNorm = 0.5309, lr_0 = 3.0160e-04
Loss = 1.1022e-01, PNorm = 80.7417, GNorm = 0.4996, lr_0 = 3.0140e-04
Loss = 1.0665e-01, PNorm = 80.7468, GNorm = 0.7408, lr_0 = 3.0119e-04
Loss = 9.9997e-02, PNorm = 80.7532, GNorm = 0.8816, lr_0 = 3.0098e-04
Loss = 1.0206e-01, PNorm = 80.7572, GNorm = 0.5977, lr_0 = 3.0078e-04
Loss = 1.0226e-01, PNorm = 80.7635, GNorm = 0.5154, lr_0 = 3.0057e-04
Loss = 1.0026e-01, PNorm = 80.7713, GNorm = 0.9188, lr_0 = 3.0036e-04
Loss = 1.2067e-01, PNorm = 80.7820, GNorm = 0.7353, lr_0 = 3.0016e-04
Loss = 1.0320e-01, PNorm = 80.7885, GNorm = 0.7585, lr_0 = 2.9995e-04
Loss = 1.0218e-01, PNorm = 80.7957, GNorm = 0.7171, lr_0 = 2.9975e-04
Loss = 1.1446e-01, PNorm = 80.8003, GNorm = 0.7399, lr_0 = 2.9954e-04
Loss = 1.1035e-01, PNorm = 80.8064, GNorm = 0.5624, lr_0 = 2.9934e-04
Loss = 1.2066e-01, PNorm = 80.8096, GNorm = 0.7399, lr_0 = 2.9913e-04
Loss = 1.0072e-01, PNorm = 80.8170, GNorm = 0.5143, lr_0 = 2.9893e-04
Loss = 1.0344e-01, PNorm = 80.8237, GNorm = 0.6203, lr_0 = 2.9872e-04
Loss = 1.0464e-01, PNorm = 80.8341, GNorm = 0.5915, lr_0 = 2.9852e-04
Loss = 1.0896e-01, PNorm = 80.8390, GNorm = 0.5961, lr_0 = 2.9831e-04
Loss = 1.0174e-01, PNorm = 80.8473, GNorm = 0.6302, lr_0 = 2.9811e-04
Loss = 1.0347e-01, PNorm = 80.8560, GNorm = 0.9207, lr_0 = 2.9790e-04
Loss = 1.0557e-01, PNorm = 80.8629, GNorm = 0.6429, lr_0 = 2.9770e-04
Loss = 1.1161e-01, PNorm = 80.8692, GNorm = 1.1825, lr_0 = 2.9750e-04
Loss = 9.9220e-02, PNorm = 80.8759, GNorm = 0.9232, lr_0 = 2.9729e-04
Loss = 9.9008e-02, PNorm = 80.8791, GNorm = 0.7610, lr_0 = 2.9709e-04
Loss = 9.9195e-02, PNorm = 80.8825, GNorm = 0.9115, lr_0 = 2.9689e-04
Loss = 9.9117e-02, PNorm = 80.8878, GNorm = 0.6359, lr_0 = 2.9668e-04
Loss = 9.6822e-02, PNorm = 80.8915, GNorm = 0.6372, lr_0 = 2.9648e-04
Loss = 1.0678e-01, PNorm = 80.9010, GNorm = 0.7348, lr_0 = 2.9628e-04
Loss = 1.0844e-01, PNorm = 80.9071, GNorm = 0.7242, lr_0 = 2.9607e-04
Loss = 1.0345e-01, PNorm = 80.9159, GNorm = 0.5947, lr_0 = 2.9587e-04
Loss = 9.8556e-02, PNorm = 80.9220, GNorm = 0.6341, lr_0 = 2.9567e-04
Loss = 1.1741e-01, PNorm = 80.9283, GNorm = 0.7255, lr_0 = 2.9546e-04
Loss = 9.9180e-02, PNorm = 80.9361, GNorm = 0.6605, lr_0 = 2.9526e-04
Loss = 9.3507e-02, PNorm = 80.9433, GNorm = 0.4375, lr_0 = 2.9506e-04
Loss = 9.9998e-02, PNorm = 80.9478, GNorm = 0.5750, lr_0 = 2.9486e-04
Loss = 1.0682e-01, PNorm = 80.9508, GNorm = 0.8061, lr_0 = 2.9466e-04
Loss = 1.1702e-01, PNorm = 80.9566, GNorm = 0.6169, lr_0 = 2.9445e-04
Loss = 1.2176e-01, PNorm = 80.9607, GNorm = 0.6613, lr_0 = 2.9425e-04
Loss = 1.0210e-01, PNorm = 80.9658, GNorm = 1.2871, lr_0 = 2.9405e-04
Loss = 1.0773e-01, PNorm = 80.9717, GNorm = 0.5721, lr_0 = 2.9385e-04
Loss = 9.0114e-02, PNorm = 80.9780, GNorm = 0.9192, lr_0 = 2.9365e-04
Loss = 1.1956e-01, PNorm = 80.9855, GNorm = 0.8232, lr_0 = 2.9345e-04
Loss = 9.1203e-02, PNorm = 80.9963, GNorm = 1.0235, lr_0 = 2.9325e-04
Loss = 1.0951e-01, PNorm = 81.0041, GNorm = 0.8384, lr_0 = 2.9305e-04
Loss = 1.0588e-01, PNorm = 81.0084, GNorm = 0.6965, lr_0 = 2.9284e-04
Loss = 9.3116e-02, PNorm = 81.0155, GNorm = 0.7905, lr_0 = 2.9264e-04
Loss = 1.0606e-01, PNorm = 81.0205, GNorm = 0.5545, lr_0 = 2.9244e-04
Loss = 1.0048e-01, PNorm = 81.0231, GNorm = 0.6316, lr_0 = 2.9224e-04
Loss = 9.7931e-02, PNorm = 81.0272, GNorm = 0.7110, lr_0 = 2.9204e-04
Loss = 9.9003e-02, PNorm = 81.0315, GNorm = 0.6221, lr_0 = 2.9184e-04
Loss = 1.1997e-01, PNorm = 81.0365, GNorm = 0.8439, lr_0 = 2.9164e-04
Loss = 8.0981e-02, PNorm = 81.0415, GNorm = 0.6841, lr_0 = 2.9144e-04
Loss = 1.1120e-01, PNorm = 81.0450, GNorm = 0.6980, lr_0 = 2.9124e-04
Validation mae = 0.227710
Epoch 17
Loss = 9.5257e-02, PNorm = 81.0491, GNorm = 0.6294, lr_0 = 2.9104e-04
Loss = 8.6384e-02, PNorm = 81.0553, GNorm = 0.5413, lr_0 = 2.9084e-04
Loss = 1.0096e-01, PNorm = 81.0633, GNorm = 0.4797, lr_0 = 2.9065e-04
Loss = 1.0954e-01, PNorm = 81.0680, GNorm = 0.8058, lr_0 = 2.9045e-04
Loss = 1.0132e-01, PNorm = 81.0785, GNorm = 0.6565, lr_0 = 2.9025e-04
Loss = 1.0215e-01, PNorm = 81.0862, GNorm = 0.8383, lr_0 = 2.9005e-04
Loss = 9.7631e-02, PNorm = 81.0899, GNorm = 0.7062, lr_0 = 2.8985e-04
Loss = 9.3671e-02, PNorm = 81.1015, GNorm = 0.8823, lr_0 = 2.8965e-04
Loss = 1.0429e-01, PNorm = 81.1096, GNorm = 0.8981, lr_0 = 2.8945e-04
Loss = 8.4330e-02, PNorm = 81.1171, GNorm = 0.9483, lr_0 = 2.8925e-04
Loss = 9.8885e-02, PNorm = 81.1233, GNorm = 0.9382, lr_0 = 2.8906e-04
Loss = 9.4546e-02, PNorm = 81.1285, GNorm = 0.6383, lr_0 = 2.8886e-04
Loss = 9.7418e-02, PNorm = 81.1307, GNorm = 0.4981, lr_0 = 2.8866e-04
Loss = 1.2404e-01, PNorm = 81.1369, GNorm = 0.5786, lr_0 = 2.8846e-04
Loss = 9.7508e-02, PNorm = 81.1455, GNorm = 0.9806, lr_0 = 2.8826e-04
Loss = 9.6781e-02, PNorm = 81.1526, GNorm = 0.6863, lr_0 = 2.8807e-04
Loss = 9.3255e-02, PNorm = 81.1554, GNorm = 0.5126, lr_0 = 2.8787e-04
Loss = 9.1069e-02, PNorm = 81.1633, GNorm = 0.5716, lr_0 = 2.8767e-04
Loss = 1.1110e-01, PNorm = 81.1716, GNorm = 0.7261, lr_0 = 2.8748e-04
Loss = 9.9235e-02, PNorm = 81.1799, GNorm = 0.9240, lr_0 = 2.8728e-04
Loss = 1.0203e-01, PNorm = 81.1825, GNorm = 0.5212, lr_0 = 2.8708e-04
Loss = 1.1788e-01, PNorm = 81.1903, GNorm = 0.7665, lr_0 = 2.8689e-04
Loss = 1.1509e-01, PNorm = 81.1959, GNorm = 0.5718, lr_0 = 2.8669e-04
Loss = 1.0199e-01, PNorm = 81.2000, GNorm = 0.8468, lr_0 = 2.8649e-04
Loss = 1.0707e-01, PNorm = 81.2066, GNorm = 0.7801, lr_0 = 2.8630e-04
Loss = 1.2756e-01, PNorm = 81.2184, GNorm = 1.4415, lr_0 = 2.8610e-04
Loss = 9.1636e-02, PNorm = 81.2276, GNorm = 0.6697, lr_0 = 2.8590e-04
Loss = 9.5341e-02, PNorm = 81.2327, GNorm = 0.7688, lr_0 = 2.8571e-04
Loss = 1.0481e-01, PNorm = 81.2388, GNorm = 0.5220, lr_0 = 2.8551e-04
Loss = 8.7799e-02, PNorm = 81.2468, GNorm = 0.6324, lr_0 = 2.8532e-04
Loss = 9.1542e-02, PNorm = 81.2519, GNorm = 0.5086, lr_0 = 2.8512e-04
Loss = 1.0912e-01, PNorm = 81.2584, GNorm = 0.7069, lr_0 = 2.8493e-04
Loss = 9.9610e-02, PNorm = 81.2642, GNorm = 0.5579, lr_0 = 2.8473e-04
Loss = 9.7855e-02, PNorm = 81.2700, GNorm = 0.5731, lr_0 = 2.8454e-04
Loss = 1.0824e-01, PNorm = 81.2792, GNorm = 1.3031, lr_0 = 2.8434e-04
Loss = 1.1418e-01, PNorm = 81.2841, GNorm = 0.7392, lr_0 = 2.8415e-04
Loss = 1.0693e-01, PNorm = 81.2904, GNorm = 0.7443, lr_0 = 2.8395e-04
Loss = 9.9600e-02, PNorm = 81.3021, GNorm = 0.6258, lr_0 = 2.8376e-04
Loss = 9.5443e-02, PNorm = 81.3105, GNorm = 0.8313, lr_0 = 2.8356e-04
Loss = 1.0949e-01, PNorm = 81.3167, GNorm = 0.7548, lr_0 = 2.8337e-04
Loss = 1.0137e-01, PNorm = 81.3230, GNorm = 0.5974, lr_0 = 2.8317e-04
Loss = 1.1396e-01, PNorm = 81.3289, GNorm = 0.6757, lr_0 = 2.8298e-04
Loss = 1.2049e-01, PNorm = 81.3374, GNorm = 0.6659, lr_0 = 2.8279e-04
Loss = 9.0243e-02, PNorm = 81.3436, GNorm = 0.6347, lr_0 = 2.8259e-04
Loss = 1.0009e-01, PNorm = 81.3494, GNorm = 0.7631, lr_0 = 2.8240e-04
Loss = 9.6212e-02, PNorm = 81.3556, GNorm = 0.7576, lr_0 = 2.8221e-04
Loss = 9.6099e-02, PNorm = 81.3611, GNorm = 0.6170, lr_0 = 2.8201e-04
Loss = 9.7404e-02, PNorm = 81.3626, GNorm = 0.5925, lr_0 = 2.8182e-04
Loss = 9.3630e-02, PNorm = 81.3702, GNorm = 0.6326, lr_0 = 2.8163e-04
Loss = 9.3465e-02, PNorm = 81.3762, GNorm = 0.5155, lr_0 = 2.8143e-04
Loss = 8.8498e-02, PNorm = 81.3816, GNorm = 0.5735, lr_0 = 2.8124e-04
Loss = 9.9051e-02, PNorm = 81.3866, GNorm = 0.7716, lr_0 = 2.8105e-04
Loss = 9.2713e-02, PNorm = 81.3906, GNorm = 0.7394, lr_0 = 2.8085e-04
Loss = 8.2182e-02, PNorm = 81.3937, GNorm = 0.6890, lr_0 = 2.8066e-04
Loss = 9.3858e-02, PNorm = 81.3986, GNorm = 0.4601, lr_0 = 2.8047e-04
Loss = 9.3444e-02, PNorm = 81.4044, GNorm = 0.6635, lr_0 = 2.8028e-04
Loss = 8.9749e-02, PNorm = 81.4069, GNorm = 0.5343, lr_0 = 2.8009e-04
Loss = 9.9955e-02, PNorm = 81.4118, GNorm = 0.6713, lr_0 = 2.7989e-04
Loss = 1.0024e-01, PNorm = 81.4160, GNorm = 0.4951, lr_0 = 2.7970e-04
Loss = 9.4312e-02, PNorm = 81.4231, GNorm = 0.6788, lr_0 = 2.7951e-04
Loss = 1.1427e-01, PNorm = 81.4280, GNorm = 0.6999, lr_0 = 2.7932e-04
Loss = 9.5477e-02, PNorm = 81.4324, GNorm = 0.8580, lr_0 = 2.7913e-04
Loss = 9.4278e-02, PNorm = 81.4378, GNorm = 0.5817, lr_0 = 2.7894e-04
Loss = 8.8794e-02, PNorm = 81.4428, GNorm = 0.6887, lr_0 = 2.7875e-04
Loss = 1.0593e-01, PNorm = 81.4495, GNorm = 0.8020, lr_0 = 2.7855e-04
Loss = 1.0689e-01, PNorm = 81.4533, GNorm = 0.7924, lr_0 = 2.7836e-04
Loss = 1.1076e-01, PNorm = 81.4624, GNorm = 0.6640, lr_0 = 2.7817e-04
Loss = 1.0376e-01, PNorm = 81.4698, GNorm = 0.7976, lr_0 = 2.7798e-04
Loss = 9.1879e-02, PNorm = 81.4730, GNorm = 0.5001, lr_0 = 2.7779e-04
Loss = 1.0013e-01, PNorm = 81.4784, GNorm = 0.5557, lr_0 = 2.7760e-04
Loss = 9.1665e-02, PNorm = 81.4816, GNorm = 0.8281, lr_0 = 2.7741e-04
Loss = 9.7795e-02, PNorm = 81.4823, GNorm = 0.6563, lr_0 = 2.7722e-04
Loss = 9.0038e-02, PNorm = 81.4846, GNorm = 0.7040, lr_0 = 2.7703e-04
Loss = 1.0390e-01, PNorm = 81.4891, GNorm = 0.6241, lr_0 = 2.7684e-04
Loss = 9.5058e-02, PNorm = 81.4934, GNorm = 0.5873, lr_0 = 2.7665e-04
Loss = 1.1519e-01, PNorm = 81.4984, GNorm = 0.7454, lr_0 = 2.7646e-04
Loss = 1.0085e-01, PNorm = 81.5077, GNorm = 0.6432, lr_0 = 2.7627e-04
Loss = 1.0801e-01, PNorm = 81.5150, GNorm = 0.7032, lr_0 = 2.7608e-04
Loss = 8.7990e-02, PNorm = 81.5215, GNorm = 0.5569, lr_0 = 2.7590e-04
Loss = 1.0736e-01, PNorm = 81.5301, GNorm = 0.7788, lr_0 = 2.7571e-04
Loss = 1.0026e-01, PNorm = 81.5374, GNorm = 0.9166, lr_0 = 2.7552e-04
Loss = 9.8457e-02, PNorm = 81.5440, GNorm = 0.5237, lr_0 = 2.7533e-04
Loss = 9.7914e-02, PNorm = 81.5474, GNorm = 0.6177, lr_0 = 2.7514e-04
Loss = 1.0560e-01, PNorm = 81.5505, GNorm = 0.6781, lr_0 = 2.7495e-04
Loss = 1.1122e-01, PNorm = 81.5574, GNorm = 0.6649, lr_0 = 2.7476e-04
Loss = 1.0700e-01, PNorm = 81.5603, GNorm = 0.7785, lr_0 = 2.7457e-04
Loss = 9.3074e-02, PNorm = 81.5687, GNorm = 0.5608, lr_0 = 2.7439e-04
Loss = 1.0074e-01, PNorm = 81.5782, GNorm = 0.6195, lr_0 = 2.7420e-04
Loss = 9.7299e-02, PNorm = 81.5886, GNorm = 0.6779, lr_0 = 2.7401e-04
Loss = 1.0209e-01, PNorm = 81.5942, GNorm = 0.8582, lr_0 = 2.7382e-04
Loss = 1.0233e-01, PNorm = 81.5991, GNorm = 0.5405, lr_0 = 2.7364e-04
Loss = 9.8421e-02, PNorm = 81.6067, GNorm = 0.5569, lr_0 = 2.7345e-04
Loss = 1.1857e-01, PNorm = 81.6092, GNorm = 0.5718, lr_0 = 2.7326e-04
Loss = 1.0940e-01, PNorm = 81.6120, GNorm = 0.7820, lr_0 = 2.7307e-04
Loss = 1.0565e-01, PNorm = 81.6175, GNorm = 0.7071, lr_0 = 2.7289e-04
Loss = 1.0678e-01, PNorm = 81.6210, GNorm = 0.6491, lr_0 = 2.7270e-04
Loss = 9.6188e-02, PNorm = 81.6255, GNorm = 0.7764, lr_0 = 2.7251e-04
Loss = 1.1747e-01, PNorm = 81.6316, GNorm = 0.8427, lr_0 = 2.7233e-04
Loss = 9.7202e-02, PNorm = 81.6371, GNorm = 0.7319, lr_0 = 2.7214e-04
Loss = 1.0122e-01, PNorm = 81.6407, GNorm = 0.8656, lr_0 = 2.7195e-04
Loss = 9.9203e-02, PNorm = 81.6456, GNorm = 0.5286, lr_0 = 2.7177e-04
Loss = 9.4318e-02, PNorm = 81.6487, GNorm = 0.7051, lr_0 = 2.7158e-04
Loss = 9.7500e-02, PNorm = 81.6557, GNorm = 0.8333, lr_0 = 2.7139e-04
Loss = 1.0665e-01, PNorm = 81.6668, GNorm = 0.5480, lr_0 = 2.7121e-04
Loss = 1.0081e-01, PNorm = 81.6720, GNorm = 0.5776, lr_0 = 2.7102e-04
Loss = 9.5458e-02, PNorm = 81.6797, GNorm = 0.7245, lr_0 = 2.7084e-04
Loss = 1.1184e-01, PNorm = 81.6846, GNorm = 0.9508, lr_0 = 2.7065e-04
Loss = 1.0358e-01, PNorm = 81.6915, GNorm = 0.6256, lr_0 = 2.7047e-04
Loss = 1.0008e-01, PNorm = 81.6997, GNorm = 0.6517, lr_0 = 2.7028e-04
Loss = 1.0176e-01, PNorm = 81.7075, GNorm = 0.7306, lr_0 = 2.7010e-04
Loss = 8.9257e-02, PNorm = 81.7096, GNorm = 0.5380, lr_0 = 2.6991e-04
Loss = 9.4795e-02, PNorm = 81.7143, GNorm = 0.8441, lr_0 = 2.6973e-04
Loss = 9.9284e-02, PNorm = 81.7225, GNorm = 0.6442, lr_0 = 2.6954e-04
Loss = 9.7895e-02, PNorm = 81.7264, GNorm = 0.6056, lr_0 = 2.6936e-04
Loss = 9.0898e-02, PNorm = 81.7284, GNorm = 0.5933, lr_0 = 2.6917e-04
Loss = 1.0255e-01, PNorm = 81.7334, GNorm = 0.5398, lr_0 = 2.6899e-04
Loss = 1.0085e-01, PNorm = 81.7409, GNorm = 0.9332, lr_0 = 2.6880e-04
Loss = 9.6211e-02, PNorm = 81.7437, GNorm = 0.6762, lr_0 = 2.6862e-04
Loss = 8.7380e-02, PNorm = 81.7499, GNorm = 1.0002, lr_0 = 2.6844e-04
Loss = 1.0574e-01, PNorm = 81.7588, GNorm = 0.6422, lr_0 = 2.6825e-04
Validation mae = 0.227983
Epoch 18
Loss = 9.4178e-02, PNorm = 81.7686, GNorm = 0.7909, lr_0 = 2.6807e-04
Loss = 9.4040e-02, PNorm = 81.7762, GNorm = 0.6672, lr_0 = 2.6788e-04
Loss = 9.9794e-02, PNorm = 81.7822, GNorm = 0.6082, lr_0 = 2.6770e-04
Loss = 9.5521e-02, PNorm = 81.7867, GNorm = 0.5700, lr_0 = 2.6752e-04
Loss = 1.0481e-01, PNorm = 81.7927, GNorm = 0.8795, lr_0 = 2.6733e-04
Loss = 9.2086e-02, PNorm = 81.8013, GNorm = 0.7001, lr_0 = 2.6715e-04
Loss = 9.1012e-02, PNorm = 81.8083, GNorm = 0.5032, lr_0 = 2.6697e-04
Loss = 8.9804e-02, PNorm = 81.8136, GNorm = 0.5582, lr_0 = 2.6678e-04
Loss = 9.7228e-02, PNorm = 81.8212, GNorm = 0.9222, lr_0 = 2.6660e-04
Loss = 1.0258e-01, PNorm = 81.8269, GNorm = 0.8670, lr_0 = 2.6642e-04
Loss = 1.0595e-01, PNorm = 81.8353, GNorm = 0.6481, lr_0 = 2.6624e-04
Loss = 9.3622e-02, PNorm = 81.8403, GNorm = 0.5882, lr_0 = 2.6605e-04
Loss = 1.1067e-01, PNorm = 81.8467, GNorm = 0.9212, lr_0 = 2.6587e-04
Loss = 1.0539e-01, PNorm = 81.8509, GNorm = 0.7231, lr_0 = 2.6569e-04
Loss = 9.9297e-02, PNorm = 81.8591, GNorm = 0.9634, lr_0 = 2.6551e-04
Loss = 9.9948e-02, PNorm = 81.8681, GNorm = 0.5696, lr_0 = 2.6533e-04
Loss = 9.6658e-02, PNorm = 81.8697, GNorm = 0.6340, lr_0 = 2.6514e-04
Loss = 9.5910e-02, PNorm = 81.8737, GNorm = 0.6802, lr_0 = 2.6496e-04
Loss = 8.9681e-02, PNorm = 81.8789, GNorm = 0.6394, lr_0 = 2.6478e-04
Loss = 8.9178e-02, PNorm = 81.8829, GNorm = 0.4653, lr_0 = 2.6460e-04
Loss = 9.2421e-02, PNorm = 81.8865, GNorm = 0.7149, lr_0 = 2.6442e-04
Loss = 9.4042e-02, PNorm = 81.8910, GNorm = 0.8026, lr_0 = 2.6424e-04
Loss = 9.1735e-02, PNorm = 81.8941, GNorm = 0.5227, lr_0 = 2.6406e-04
Loss = 1.0738e-01, PNorm = 81.9002, GNorm = 0.8173, lr_0 = 2.6388e-04
Loss = 8.6876e-02, PNorm = 81.9070, GNorm = 0.5393, lr_0 = 2.6369e-04
Loss = 1.0505e-01, PNorm = 81.9113, GNorm = 0.5222, lr_0 = 2.6351e-04
Loss = 9.9018e-02, PNorm = 81.9197, GNorm = 0.7454, lr_0 = 2.6333e-04
Loss = 8.9861e-02, PNorm = 81.9231, GNorm = 0.7693, lr_0 = 2.6315e-04
Loss = 1.0400e-01, PNorm = 81.9280, GNorm = 0.5502, lr_0 = 2.6297e-04
Loss = 9.0602e-02, PNorm = 81.9333, GNorm = 0.5488, lr_0 = 2.6279e-04
Loss = 9.1253e-02, PNorm = 81.9389, GNorm = 0.7834, lr_0 = 2.6261e-04
Loss = 1.0770e-01, PNorm = 81.9452, GNorm = 0.6255, lr_0 = 2.6243e-04
Loss = 9.1673e-02, PNorm = 81.9507, GNorm = 0.5820, lr_0 = 2.6225e-04
Loss = 8.8277e-02, PNorm = 81.9543, GNorm = 0.6026, lr_0 = 2.6207e-04
Loss = 1.0279e-01, PNorm = 81.9568, GNorm = 0.6019, lr_0 = 2.6189e-04
Loss = 9.3580e-02, PNorm = 81.9616, GNorm = 0.5970, lr_0 = 2.6171e-04
Loss = 8.5182e-02, PNorm = 81.9662, GNorm = 0.7497, lr_0 = 2.6153e-04
Loss = 9.0147e-02, PNorm = 81.9698, GNorm = 0.6978, lr_0 = 2.6136e-04
Loss = 9.8329e-02, PNorm = 81.9789, GNorm = 0.7040, lr_0 = 2.6118e-04
Loss = 9.4655e-02, PNorm = 81.9856, GNorm = 0.8228, lr_0 = 2.6100e-04
Loss = 1.1233e-01, PNorm = 81.9922, GNorm = 0.8928, lr_0 = 2.6082e-04
Loss = 9.6452e-02, PNorm = 82.0000, GNorm = 0.8509, lr_0 = 2.6064e-04
Loss = 9.4196e-02, PNorm = 82.0062, GNorm = 1.0478, lr_0 = 2.6046e-04
Loss = 9.0380e-02, PNorm = 82.0109, GNorm = 0.8861, lr_0 = 2.6028e-04
Loss = 9.3490e-02, PNorm = 82.0162, GNorm = 0.6083, lr_0 = 2.6011e-04
Loss = 9.4211e-02, PNorm = 82.0209, GNorm = 0.5843, lr_0 = 2.5993e-04
Loss = 8.8800e-02, PNorm = 82.0214, GNorm = 0.5447, lr_0 = 2.5975e-04
Loss = 1.0150e-01, PNorm = 82.0265, GNorm = 0.6511, lr_0 = 2.5957e-04
Loss = 9.3602e-02, PNorm = 82.0322, GNorm = 0.6988, lr_0 = 2.5939e-04
Loss = 1.0807e-01, PNorm = 82.0389, GNorm = 0.7356, lr_0 = 2.5922e-04
Loss = 9.8742e-02, PNorm = 82.0444, GNorm = 0.7874, lr_0 = 2.5904e-04
Loss = 1.0378e-01, PNorm = 82.0488, GNorm = 0.7037, lr_0 = 2.5886e-04
Loss = 9.5711e-02, PNorm = 82.0541, GNorm = 0.6575, lr_0 = 2.5868e-04
Loss = 9.5917e-02, PNorm = 82.0593, GNorm = 0.7082, lr_0 = 2.5851e-04
Loss = 1.0857e-01, PNorm = 82.0652, GNorm = 0.6866, lr_0 = 2.5833e-04
Loss = 8.9253e-02, PNorm = 82.0711, GNorm = 0.6820, lr_0 = 2.5815e-04
Loss = 9.5778e-02, PNorm = 82.0751, GNorm = 0.5450, lr_0 = 2.5797e-04
Loss = 8.8133e-02, PNorm = 82.0806, GNorm = 0.5917, lr_0 = 2.5780e-04
Loss = 8.8224e-02, PNorm = 82.0843, GNorm = 1.0145, lr_0 = 2.5762e-04
Loss = 9.8850e-02, PNorm = 82.0869, GNorm = 0.8194, lr_0 = 2.5745e-04
Loss = 9.8558e-02, PNorm = 82.0921, GNorm = 0.5890, lr_0 = 2.5727e-04
Loss = 9.8964e-02, PNorm = 82.0963, GNorm = 0.5379, lr_0 = 2.5709e-04
Loss = 9.9859e-02, PNorm = 82.1035, GNorm = 0.6741, lr_0 = 2.5692e-04
Loss = 9.3283e-02, PNorm = 82.1102, GNorm = 0.7470, lr_0 = 2.5674e-04
Loss = 8.7319e-02, PNorm = 82.1182, GNorm = 0.6658, lr_0 = 2.5656e-04
Loss = 9.3199e-02, PNorm = 82.1224, GNorm = 0.6423, lr_0 = 2.5639e-04
Loss = 9.6042e-02, PNorm = 82.1271, GNorm = 0.7660, lr_0 = 2.5621e-04
Loss = 1.1107e-01, PNorm = 82.1302, GNorm = 0.6792, lr_0 = 2.5604e-04
Loss = 1.0120e-01, PNorm = 82.1354, GNorm = 0.5016, lr_0 = 2.5586e-04
Loss = 1.0666e-01, PNorm = 82.1419, GNorm = 0.6190, lr_0 = 2.5569e-04
Loss = 9.8215e-02, PNorm = 82.1476, GNorm = 0.5735, lr_0 = 2.5551e-04
Loss = 9.6071e-02, PNorm = 82.1553, GNorm = 0.5409, lr_0 = 2.5534e-04
Loss = 9.5599e-02, PNorm = 82.1632, GNorm = 0.8722, lr_0 = 2.5516e-04
Loss = 9.5232e-02, PNorm = 82.1697, GNorm = 0.4933, lr_0 = 2.5499e-04
Loss = 9.9590e-02, PNorm = 82.1726, GNorm = 0.7452, lr_0 = 2.5481e-04
Loss = 9.9486e-02, PNorm = 82.1717, GNorm = 0.7158, lr_0 = 2.5464e-04
Loss = 1.0392e-01, PNorm = 82.1762, GNorm = 0.4982, lr_0 = 2.5446e-04
Loss = 1.0237e-01, PNorm = 82.1804, GNorm = 0.5801, lr_0 = 2.5429e-04
Loss = 9.4392e-02, PNorm = 82.1832, GNorm = 0.5541, lr_0 = 2.5411e-04
Loss = 9.5657e-02, PNorm = 82.1890, GNorm = 0.6052, lr_0 = 2.5394e-04
Loss = 8.8411e-02, PNorm = 82.1930, GNorm = 0.6906, lr_0 = 2.5377e-04
Loss = 9.0450e-02, PNorm = 82.1984, GNorm = 1.0282, lr_0 = 2.5359e-04
Loss = 1.0597e-01, PNorm = 82.2033, GNorm = 0.4928, lr_0 = 2.5342e-04
Loss = 8.4997e-02, PNorm = 82.2094, GNorm = 0.8529, lr_0 = 2.5325e-04
Loss = 9.2930e-02, PNorm = 82.2149, GNorm = 0.6126, lr_0 = 2.5307e-04
Loss = 8.8207e-02, PNorm = 82.2180, GNorm = 0.5840, lr_0 = 2.5290e-04
Loss = 1.0052e-01, PNorm = 82.2238, GNorm = 0.6301, lr_0 = 2.5273e-04
Loss = 8.5283e-02, PNorm = 82.2332, GNorm = 1.0712, lr_0 = 2.5255e-04
Loss = 1.0434e-01, PNorm = 82.2378, GNorm = 1.0586, lr_0 = 2.5238e-04
Loss = 1.0643e-01, PNorm = 82.2430, GNorm = 0.5472, lr_0 = 2.5221e-04
Loss = 9.1894e-02, PNorm = 82.2506, GNorm = 0.5797, lr_0 = 2.5203e-04
Loss = 9.7587e-02, PNorm = 82.2574, GNorm = 0.6910, lr_0 = 2.5186e-04
Loss = 9.9136e-02, PNorm = 82.2603, GNorm = 0.5038, lr_0 = 2.5169e-04
Loss = 1.0264e-01, PNorm = 82.2652, GNorm = 0.6760, lr_0 = 2.5152e-04
Loss = 1.0221e-01, PNorm = 82.2713, GNorm = 0.5690, lr_0 = 2.5134e-04
Loss = 1.0377e-01, PNorm = 82.2739, GNorm = 0.5113, lr_0 = 2.5117e-04
Loss = 9.7300e-02, PNorm = 82.2757, GNorm = 0.9106, lr_0 = 2.5100e-04
Loss = 9.0255e-02, PNorm = 82.2794, GNorm = 0.6377, lr_0 = 2.5083e-04
Loss = 1.0054e-01, PNorm = 82.2850, GNorm = 0.4849, lr_0 = 2.5066e-04
Loss = 1.0296e-01, PNorm = 82.2910, GNorm = 0.8539, lr_0 = 2.5048e-04
Loss = 9.2487e-02, PNorm = 82.2930, GNorm = 0.7979, lr_0 = 2.5031e-04
Loss = 9.4200e-02, PNorm = 82.2981, GNorm = 0.4398, lr_0 = 2.5014e-04
Loss = 9.3797e-02, PNorm = 82.3040, GNorm = 0.6820, lr_0 = 2.4997e-04
Loss = 9.0328e-02, PNorm = 82.3088, GNorm = 0.4910, lr_0 = 2.4980e-04
Loss = 9.7065e-02, PNorm = 82.3155, GNorm = 0.5953, lr_0 = 2.4963e-04
Loss = 1.2425e-01, PNorm = 82.3221, GNorm = 0.8413, lr_0 = 2.4946e-04
Loss = 1.0258e-01, PNorm = 82.3284, GNorm = 0.9598, lr_0 = 2.4929e-04
Loss = 9.1429e-02, PNorm = 82.3354, GNorm = 0.7722, lr_0 = 2.4911e-04
Loss = 1.0894e-01, PNorm = 82.3401, GNorm = 0.5431, lr_0 = 2.4894e-04
Loss = 8.8977e-02, PNorm = 82.3451, GNorm = 0.6621, lr_0 = 2.4877e-04
Loss = 9.4494e-02, PNorm = 82.3483, GNorm = 0.5453, lr_0 = 2.4860e-04
Loss = 8.0631e-02, PNorm = 82.3505, GNorm = 0.7262, lr_0 = 2.4843e-04
Loss = 9.9034e-02, PNorm = 82.3536, GNorm = 0.6214, lr_0 = 2.4826e-04
Loss = 9.9870e-02, PNorm = 82.3559, GNorm = 0.7296, lr_0 = 2.4809e-04
Loss = 8.9438e-02, PNorm = 82.3602, GNorm = 0.5140, lr_0 = 2.4792e-04
Loss = 1.0119e-01, PNorm = 82.3619, GNorm = 0.4726, lr_0 = 2.4775e-04
Loss = 9.4405e-02, PNorm = 82.3689, GNorm = 0.5836, lr_0 = 2.4758e-04
Loss = 1.0121e-01, PNorm = 82.3746, GNorm = 0.7679, lr_0 = 2.4741e-04
Loss = 8.7334e-02, PNorm = 82.3821, GNorm = 0.7804, lr_0 = 2.4724e-04
Loss = 1.0310e-01, PNorm = 82.3866, GNorm = 0.7266, lr_0 = 2.4707e-04
Validation mae = 0.227421
Epoch 19
Loss = 9.2581e-02, PNorm = 82.3885, GNorm = 0.8668, lr_0 = 2.4690e-04
Loss = 8.7374e-02, PNorm = 82.3916, GNorm = 0.5216, lr_0 = 2.4674e-04
Loss = 8.7211e-02, PNorm = 82.3957, GNorm = 0.6934, lr_0 = 2.4657e-04
Loss = 9.9356e-02, PNorm = 82.4032, GNorm = 0.6083, lr_0 = 2.4640e-04
Loss = 9.6271e-02, PNorm = 82.4115, GNorm = 0.7427, lr_0 = 2.4623e-04
Loss = 9.5536e-02, PNorm = 82.4204, GNorm = 0.5720, lr_0 = 2.4606e-04
Loss = 8.4453e-02, PNorm = 82.4261, GNorm = 1.0491, lr_0 = 2.4589e-04
Loss = 8.8061e-02, PNorm = 82.4319, GNorm = 0.6423, lr_0 = 2.4572e-04
Loss = 9.4113e-02, PNorm = 82.4372, GNorm = 0.9637, lr_0 = 2.4556e-04
Loss = 8.9707e-02, PNorm = 82.4410, GNorm = 0.6553, lr_0 = 2.4539e-04
Loss = 9.5359e-02, PNorm = 82.4469, GNorm = 0.6423, lr_0 = 2.4522e-04
Loss = 9.1585e-02, PNorm = 82.4516, GNorm = 1.0669, lr_0 = 2.4505e-04
Loss = 8.8414e-02, PNorm = 82.4536, GNorm = 0.4581, lr_0 = 2.4488e-04
Loss = 9.2589e-02, PNorm = 82.4557, GNorm = 0.6882, lr_0 = 2.4472e-04
Loss = 9.0460e-02, PNorm = 82.4564, GNorm = 0.6411, lr_0 = 2.4455e-04
Loss = 8.5372e-02, PNorm = 82.4587, GNorm = 0.6488, lr_0 = 2.4438e-04
Loss = 9.1455e-02, PNorm = 82.4642, GNorm = 0.4891, lr_0 = 2.4421e-04
Loss = 8.5030e-02, PNorm = 82.4694, GNorm = 0.7526, lr_0 = 2.4405e-04
Loss = 9.2509e-02, PNorm = 82.4725, GNorm = 0.7394, lr_0 = 2.4388e-04
Loss = 9.6439e-02, PNorm = 82.4814, GNorm = 0.6949, lr_0 = 2.4371e-04
Loss = 9.8718e-02, PNorm = 82.4866, GNorm = 0.5705, lr_0 = 2.4354e-04
Loss = 9.5581e-02, PNorm = 82.4913, GNorm = 0.6271, lr_0 = 2.4338e-04
Loss = 8.4548e-02, PNorm = 82.4992, GNorm = 0.8183, lr_0 = 2.4321e-04
Loss = 9.2510e-02, PNorm = 82.5027, GNorm = 0.5683, lr_0 = 2.4304e-04
Loss = 8.6528e-02, PNorm = 82.5058, GNorm = 0.7511, lr_0 = 2.4288e-04
Loss = 8.7348e-02, PNorm = 82.5127, GNorm = 0.4934, lr_0 = 2.4271e-04
Loss = 9.1135e-02, PNorm = 82.5132, GNorm = 0.6724, lr_0 = 2.4254e-04
Loss = 9.1427e-02, PNorm = 82.5141, GNorm = 0.5519, lr_0 = 2.4238e-04
Loss = 9.7520e-02, PNorm = 82.5201, GNorm = 0.7520, lr_0 = 2.4221e-04
Loss = 9.3053e-02, PNorm = 82.5231, GNorm = 0.7003, lr_0 = 2.4205e-04
Loss = 1.0419e-01, PNorm = 82.5269, GNorm = 0.5842, lr_0 = 2.4188e-04
Loss = 9.7697e-02, PNorm = 82.5333, GNorm = 0.5589, lr_0 = 2.4171e-04
Loss = 9.6194e-02, PNorm = 82.5363, GNorm = 0.5350, lr_0 = 2.4155e-04
Loss = 9.8554e-02, PNorm = 82.5424, GNorm = 0.5272, lr_0 = 2.4138e-04
Loss = 1.0827e-01, PNorm = 82.5487, GNorm = 0.8793, lr_0 = 2.4122e-04
Loss = 9.2443e-02, PNorm = 82.5525, GNorm = 1.1346, lr_0 = 2.4105e-04
Loss = 9.4856e-02, PNorm = 82.5580, GNorm = 0.6499, lr_0 = 2.4089e-04
Loss = 9.2348e-02, PNorm = 82.5627, GNorm = 0.5030, lr_0 = 2.4072e-04
Loss = 1.1069e-01, PNorm = 82.5684, GNorm = 0.5383, lr_0 = 2.4056e-04
Loss = 7.9823e-02, PNorm = 82.5711, GNorm = 0.6659, lr_0 = 2.4039e-04
Loss = 9.7898e-02, PNorm = 82.5712, GNorm = 0.6909, lr_0 = 2.4023e-04
Loss = 9.6191e-02, PNorm = 82.5769, GNorm = 0.5842, lr_0 = 2.4006e-04
Loss = 9.3130e-02, PNorm = 82.5839, GNorm = 0.7856, lr_0 = 2.3990e-04
Loss = 9.4491e-02, PNorm = 82.5881, GNorm = 0.6441, lr_0 = 2.3974e-04
Loss = 9.5390e-02, PNorm = 82.5940, GNorm = 0.7611, lr_0 = 2.3957e-04
Loss = 8.2823e-02, PNorm = 82.5986, GNorm = 0.7041, lr_0 = 2.3941e-04
Loss = 9.6742e-02, PNorm = 82.5988, GNorm = 0.7553, lr_0 = 2.3924e-04
Loss = 9.8531e-02, PNorm = 82.6030, GNorm = 0.8892, lr_0 = 2.3908e-04
Loss = 1.0378e-01, PNorm = 82.6076, GNorm = 0.4624, lr_0 = 2.3892e-04
Loss = 8.9808e-02, PNorm = 82.6123, GNorm = 0.6996, lr_0 = 2.3875e-04
Loss = 9.1661e-02, PNorm = 82.6209, GNorm = 0.5745, lr_0 = 2.3859e-04
Loss = 7.7488e-02, PNorm = 82.6248, GNorm = 0.5916, lr_0 = 2.3842e-04
Loss = 1.0360e-01, PNorm = 82.6272, GNorm = 0.5411, lr_0 = 2.3826e-04
Loss = 1.0510e-01, PNorm = 82.6328, GNorm = 0.6481, lr_0 = 2.3810e-04
Loss = 8.3936e-02, PNorm = 82.6377, GNorm = 0.5448, lr_0 = 2.3794e-04
Loss = 8.9890e-02, PNorm = 82.6456, GNorm = 0.7447, lr_0 = 2.3777e-04
Loss = 9.4023e-02, PNorm = 82.6514, GNorm = 0.6204, lr_0 = 2.3761e-04
Loss = 9.5741e-02, PNorm = 82.6580, GNorm = 1.0080, lr_0 = 2.3745e-04
Loss = 9.5690e-02, PNorm = 82.6620, GNorm = 0.8773, lr_0 = 2.3728e-04
Loss = 1.0034e-01, PNorm = 82.6647, GNorm = 0.5821, lr_0 = 2.3712e-04
Loss = 9.5661e-02, PNorm = 82.6655, GNorm = 0.6251, lr_0 = 2.3696e-04
Loss = 9.8959e-02, PNorm = 82.6711, GNorm = 0.9995, lr_0 = 2.3680e-04
Loss = 1.1176e-01, PNorm = 82.6775, GNorm = 0.8441, lr_0 = 2.3663e-04
Loss = 9.6850e-02, PNorm = 82.6818, GNorm = 0.7199, lr_0 = 2.3647e-04
Loss = 9.5708e-02, PNorm = 82.6854, GNorm = 0.6519, lr_0 = 2.3631e-04
Loss = 9.4726e-02, PNorm = 82.6913, GNorm = 0.4837, lr_0 = 2.3615e-04
Loss = 1.0309e-01, PNorm = 82.6941, GNorm = 0.5617, lr_0 = 2.3599e-04
Loss = 1.0336e-01, PNorm = 82.6986, GNorm = 0.8703, lr_0 = 2.3582e-04
Loss = 1.0003e-01, PNorm = 82.7031, GNorm = 0.9516, lr_0 = 2.3566e-04
Loss = 9.3576e-02, PNorm = 82.7045, GNorm = 0.7501, lr_0 = 2.3550e-04
Loss = 9.1726e-02, PNorm = 82.7088, GNorm = 0.4581, lr_0 = 2.3534e-04
Loss = 9.3835e-02, PNorm = 82.7137, GNorm = 0.6383, lr_0 = 2.3518e-04
Loss = 9.6683e-02, PNorm = 82.7179, GNorm = 0.6153, lr_0 = 2.3502e-04
Loss = 1.0176e-01, PNorm = 82.7243, GNorm = 0.7349, lr_0 = 2.3486e-04
Loss = 9.2002e-02, PNorm = 82.7285, GNorm = 0.5085, lr_0 = 2.3470e-04
Loss = 8.8029e-02, PNorm = 82.7329, GNorm = 0.6798, lr_0 = 2.3454e-04
Loss = 9.7342e-02, PNorm = 82.7383, GNorm = 0.5520, lr_0 = 2.3437e-04
Loss = 9.0062e-02, PNorm = 82.7441, GNorm = 0.6152, lr_0 = 2.3421e-04
Loss = 1.0477e-01, PNorm = 82.7483, GNorm = 0.6956, lr_0 = 2.3405e-04
Loss = 1.0469e-01, PNorm = 82.7537, GNorm = 1.5371, lr_0 = 2.3389e-04
Loss = 7.6679e-02, PNorm = 82.7588, GNorm = 0.5499, lr_0 = 2.3373e-04
Loss = 8.6232e-02, PNorm = 82.7615, GNorm = 0.5989, lr_0 = 2.3357e-04
Loss = 1.0099e-01, PNorm = 82.7661, GNorm = 0.6211, lr_0 = 2.3341e-04
Loss = 9.5834e-02, PNorm = 82.7700, GNorm = 0.5318, lr_0 = 2.3325e-04
Loss = 8.6889e-02, PNorm = 82.7720, GNorm = 0.7094, lr_0 = 2.3309e-04
Loss = 8.7860e-02, PNorm = 82.7813, GNorm = 0.9723, lr_0 = 2.3293e-04
Loss = 8.6536e-02, PNorm = 82.7842, GNorm = 0.6121, lr_0 = 2.3277e-04
Loss = 9.3970e-02, PNorm = 82.7888, GNorm = 0.6527, lr_0 = 2.3261e-04
Loss = 1.0646e-01, PNorm = 82.7940, GNorm = 0.5629, lr_0 = 2.3246e-04
Loss = 8.5160e-02, PNorm = 82.7972, GNorm = 0.6593, lr_0 = 2.3230e-04
Loss = 9.6246e-02, PNorm = 82.8002, GNorm = 0.6949, lr_0 = 2.3214e-04
Loss = 8.9451e-02, PNorm = 82.8038, GNorm = 0.7784, lr_0 = 2.3198e-04
Loss = 9.8948e-02, PNorm = 82.8080, GNorm = 0.7992, lr_0 = 2.3182e-04
Loss = 9.0096e-02, PNorm = 82.8134, GNorm = 0.6720, lr_0 = 2.3166e-04
Loss = 1.0654e-01, PNorm = 82.8166, GNorm = 0.6784, lr_0 = 2.3150e-04
Loss = 1.0901e-01, PNorm = 82.8231, GNorm = 0.5964, lr_0 = 2.3134e-04
Loss = 9.7273e-02, PNorm = 82.8305, GNorm = 0.5438, lr_0 = 2.3118e-04
Loss = 8.8723e-02, PNorm = 82.8355, GNorm = 0.6189, lr_0 = 2.3103e-04
Loss = 9.1528e-02, PNorm = 82.8396, GNorm = 0.6237, lr_0 = 2.3087e-04
Loss = 9.4783e-02, PNorm = 82.8439, GNorm = 0.7570, lr_0 = 2.3071e-04
Loss = 9.7514e-02, PNorm = 82.8503, GNorm = 0.8088, lr_0 = 2.3055e-04
Loss = 1.1636e-01, PNorm = 82.8545, GNorm = 0.6301, lr_0 = 2.3039e-04
Loss = 8.6664e-02, PNorm = 82.8594, GNorm = 0.7735, lr_0 = 2.3024e-04
Loss = 9.0026e-02, PNorm = 82.8628, GNorm = 0.7624, lr_0 = 2.3008e-04
Loss = 9.0380e-02, PNorm = 82.8668, GNorm = 0.6133, lr_0 = 2.2992e-04
Loss = 8.9760e-02, PNorm = 82.8710, GNorm = 0.6158, lr_0 = 2.2976e-04
Loss = 9.6745e-02, PNorm = 82.8762, GNorm = 0.4881, lr_0 = 2.2961e-04
Loss = 8.9179e-02, PNorm = 82.8784, GNorm = 0.9053, lr_0 = 2.2945e-04
Loss = 9.4465e-02, PNorm = 82.8815, GNorm = 0.6802, lr_0 = 2.2929e-04
Loss = 7.1600e-02, PNorm = 82.8869, GNorm = 0.6202, lr_0 = 2.2913e-04
Loss = 9.3886e-02, PNorm = 82.8918, GNorm = 0.7818, lr_0 = 2.2898e-04
Loss = 9.2943e-02, PNorm = 82.8952, GNorm = 0.6175, lr_0 = 2.2882e-04
Loss = 9.7698e-02, PNorm = 82.8984, GNorm = 0.9571, lr_0 = 2.2866e-04
Loss = 9.0818e-02, PNorm = 82.9015, GNorm = 0.6697, lr_0 = 2.2851e-04
Loss = 9.4997e-02, PNorm = 82.9042, GNorm = 0.6030, lr_0 = 2.2835e-04
Loss = 8.1237e-02, PNorm = 82.9103, GNorm = 0.6627, lr_0 = 2.2819e-04
Loss = 9.6832e-02, PNorm = 82.9166, GNorm = 0.7032, lr_0 = 2.2804e-04
Loss = 1.0273e-01, PNorm = 82.9205, GNorm = 0.6104, lr_0 = 2.2788e-04
Loss = 9.8688e-02, PNorm = 82.9247, GNorm = 0.5961, lr_0 = 2.2773e-04
Loss = 1.0068e-01, PNorm = 82.9282, GNorm = 0.5518, lr_0 = 2.2757e-04
Validation mae = 0.227580
Epoch 20
Loss = 9.9904e-02, PNorm = 82.9323, GNorm = 0.7098, lr_0 = 2.2741e-04
Loss = 8.4024e-02, PNorm = 82.9379, GNorm = 0.6075, lr_0 = 2.2726e-04
Loss = 8.5409e-02, PNorm = 82.9404, GNorm = 0.6114, lr_0 = 2.2710e-04
Loss = 8.5812e-02, PNorm = 82.9443, GNorm = 0.6956, lr_0 = 2.2695e-04
Loss = 7.8267e-02, PNorm = 82.9485, GNorm = 0.4673, lr_0 = 2.2679e-04
Loss = 9.4270e-02, PNorm = 82.9526, GNorm = 0.5170, lr_0 = 2.2664e-04
Loss = 8.6606e-02, PNorm = 82.9556, GNorm = 0.7043, lr_0 = 2.2648e-04
Loss = 8.6502e-02, PNorm = 82.9594, GNorm = 0.4833, lr_0 = 2.2632e-04
Loss = 8.4456e-02, PNorm = 82.9647, GNorm = 0.6564, lr_0 = 2.2617e-04
Loss = 9.3808e-02, PNorm = 82.9679, GNorm = 0.6503, lr_0 = 2.2601e-04
Loss = 8.8962e-02, PNorm = 82.9738, GNorm = 0.7053, lr_0 = 2.2586e-04
Loss = 8.6229e-02, PNorm = 82.9791, GNorm = 0.6865, lr_0 = 2.2571e-04
Loss = 8.3791e-02, PNorm = 82.9811, GNorm = 0.6406, lr_0 = 2.2555e-04
Loss = 8.9904e-02, PNorm = 82.9865, GNorm = 0.5722, lr_0 = 2.2540e-04
Loss = 8.0007e-02, PNorm = 82.9878, GNorm = 0.6784, lr_0 = 2.2524e-04
Loss = 9.1851e-02, PNorm = 82.9925, GNorm = 0.7075, lr_0 = 2.2509e-04
Loss = 9.2341e-02, PNorm = 82.9958, GNorm = 0.5746, lr_0 = 2.2493e-04
Loss = 9.4793e-02, PNorm = 82.9998, GNorm = 0.6976, lr_0 = 2.2478e-04
Loss = 7.3490e-02, PNorm = 83.0044, GNorm = 0.4667, lr_0 = 2.2463e-04
Loss = 7.7604e-02, PNorm = 83.0064, GNorm = 0.5396, lr_0 = 2.2447e-04
Loss = 8.2767e-02, PNorm = 83.0098, GNorm = 0.7821, lr_0 = 2.2432e-04
Loss = 1.0261e-01, PNorm = 83.0167, GNorm = 0.8467, lr_0 = 2.2416e-04
Loss = 7.5362e-02, PNorm = 83.0184, GNorm = 0.5586, lr_0 = 2.2401e-04
Loss = 8.7735e-02, PNorm = 83.0212, GNorm = 0.6155, lr_0 = 2.2386e-04
Loss = 8.6278e-02, PNorm = 83.0241, GNorm = 0.7442, lr_0 = 2.2370e-04
Loss = 9.6206e-02, PNorm = 83.0290, GNorm = 0.6121, lr_0 = 2.2355e-04
Loss = 9.0962e-02, PNorm = 83.0331, GNorm = 0.5725, lr_0 = 2.2340e-04
Loss = 9.0250e-02, PNorm = 83.0362, GNorm = 0.5661, lr_0 = 2.2324e-04
Loss = 8.2895e-02, PNorm = 83.0411, GNorm = 0.5932, lr_0 = 2.2309e-04
Loss = 7.4481e-02, PNorm = 83.0458, GNorm = 0.6171, lr_0 = 2.2294e-04
Loss = 1.0662e-01, PNorm = 83.0478, GNorm = 0.5332, lr_0 = 2.2279e-04
Loss = 8.8405e-02, PNorm = 83.0533, GNorm = 0.6079, lr_0 = 2.2263e-04
Loss = 8.8864e-02, PNorm = 83.0561, GNorm = 0.4795, lr_0 = 2.2248e-04
Loss = 9.6311e-02, PNorm = 83.0605, GNorm = 0.9270, lr_0 = 2.2233e-04
Loss = 9.7651e-02, PNorm = 83.0654, GNorm = 0.7599, lr_0 = 2.2218e-04
Loss = 8.9685e-02, PNorm = 83.0692, GNorm = 0.5188, lr_0 = 2.2202e-04
Loss = 9.1726e-02, PNorm = 83.0753, GNorm = 0.5953, lr_0 = 2.2187e-04
Loss = 7.5604e-02, PNorm = 83.0796, GNorm = 0.5122, lr_0 = 2.2172e-04
Loss = 8.4739e-02, PNorm = 83.0843, GNorm = 0.6389, lr_0 = 2.2157e-04
Loss = 8.9425e-02, PNorm = 83.0894, GNorm = 0.7266, lr_0 = 2.2142e-04
Loss = 8.6315e-02, PNorm = 83.0919, GNorm = 0.6014, lr_0 = 2.2126e-04
Loss = 8.7790e-02, PNorm = 83.0941, GNorm = 0.8229, lr_0 = 2.2111e-04
Loss = 8.8590e-02, PNorm = 83.0965, GNorm = 0.5696, lr_0 = 2.2096e-04
Loss = 9.8719e-02, PNorm = 83.1020, GNorm = 0.6475, lr_0 = 2.2081e-04
Loss = 8.5461e-02, PNorm = 83.1062, GNorm = 0.5082, lr_0 = 2.2066e-04
Loss = 9.3792e-02, PNorm = 83.1111, GNorm = 0.6762, lr_0 = 2.2051e-04
Loss = 8.9241e-02, PNorm = 83.1144, GNorm = 0.7502, lr_0 = 2.2036e-04
Loss = 9.2985e-02, PNorm = 83.1171, GNorm = 0.5718, lr_0 = 2.2021e-04
Loss = 8.4764e-02, PNorm = 83.1222, GNorm = 0.4852, lr_0 = 2.2005e-04
Loss = 8.7799e-02, PNorm = 83.1268, GNorm = 0.5143, lr_0 = 2.1990e-04
Loss = 9.2773e-02, PNorm = 83.1303, GNorm = 0.6314, lr_0 = 2.1975e-04
Loss = 8.3899e-02, PNorm = 83.1352, GNorm = 0.6854, lr_0 = 2.1960e-04
Loss = 8.4549e-02, PNorm = 83.1403, GNorm = 0.6461, lr_0 = 2.1945e-04
Loss = 9.3621e-02, PNorm = 83.1450, GNorm = 0.5151, lr_0 = 2.1930e-04
Loss = 1.0547e-01, PNorm = 83.1472, GNorm = 0.6715, lr_0 = 2.1915e-04
Loss = 1.0402e-01, PNorm = 83.1586, GNorm = 0.7558, lr_0 = 2.1900e-04
Loss = 9.8487e-02, PNorm = 83.1633, GNorm = 0.6761, lr_0 = 2.1885e-04
Loss = 9.9187e-02, PNorm = 83.1691, GNorm = 0.5605, lr_0 = 2.1870e-04
Loss = 9.6048e-02, PNorm = 83.1775, GNorm = 0.5862, lr_0 = 2.1855e-04
Loss = 1.0051e-01, PNorm = 83.1821, GNorm = 0.7874, lr_0 = 2.1840e-04
Loss = 8.6507e-02, PNorm = 83.1830, GNorm = 0.6943, lr_0 = 2.1825e-04
Loss = 8.9406e-02, PNorm = 83.1855, GNorm = 0.5959, lr_0 = 2.1810e-04
Loss = 9.3296e-02, PNorm = 83.1891, GNorm = 0.5361, lr_0 = 2.1795e-04
Loss = 8.6493e-02, PNorm = 83.1935, GNorm = 0.5614, lr_0 = 2.1780e-04
Loss = 9.3820e-02, PNorm = 83.1983, GNorm = 0.7383, lr_0 = 2.1765e-04
Loss = 9.8178e-02, PNorm = 83.2023, GNorm = 0.6543, lr_0 = 2.1751e-04
Loss = 8.9686e-02, PNorm = 83.2061, GNorm = 0.5798, lr_0 = 2.1736e-04
Loss = 9.4189e-02, PNorm = 83.2101, GNorm = 0.7430, lr_0 = 2.1721e-04
Loss = 9.1311e-02, PNorm = 83.2165, GNorm = 1.1021, lr_0 = 2.1706e-04
Loss = 9.1184e-02, PNorm = 83.2232, GNorm = 0.7225, lr_0 = 2.1691e-04
Loss = 9.1402e-02, PNorm = 83.2270, GNorm = 0.5588, lr_0 = 2.1676e-04
Loss = 8.6251e-02, PNorm = 83.2279, GNorm = 0.5609, lr_0 = 2.1661e-04
Loss = 8.5274e-02, PNorm = 83.2329, GNorm = 0.6046, lr_0 = 2.1646e-04
Loss = 9.2245e-02, PNorm = 83.2347, GNorm = 0.7155, lr_0 = 2.1632e-04
Loss = 9.4591e-02, PNorm = 83.2379, GNorm = 0.8056, lr_0 = 2.1617e-04
Loss = 9.6002e-02, PNorm = 83.2421, GNorm = 0.6948, lr_0 = 2.1602e-04
Loss = 8.8055e-02, PNorm = 83.2479, GNorm = 0.8520, lr_0 = 2.1587e-04
Loss = 9.6793e-02, PNorm = 83.2530, GNorm = 0.6662, lr_0 = 2.1572e-04
Loss = 8.6071e-02, PNorm = 83.2574, GNorm = 0.6009, lr_0 = 2.1558e-04
Loss = 9.0419e-02, PNorm = 83.2603, GNorm = 0.6137, lr_0 = 2.1543e-04
Loss = 9.1275e-02, PNorm = 83.2606, GNorm = 0.4768, lr_0 = 2.1528e-04
Loss = 9.0062e-02, PNorm = 83.2632, GNorm = 0.7149, lr_0 = 2.1513e-04
Loss = 9.5858e-02, PNorm = 83.2679, GNorm = 0.7266, lr_0 = 2.1499e-04
Loss = 8.0866e-02, PNorm = 83.2732, GNorm = 0.7192, lr_0 = 2.1484e-04
Loss = 1.0260e-01, PNorm = 83.2791, GNorm = 0.5373, lr_0 = 2.1469e-04
Loss = 9.0326e-02, PNorm = 83.2848, GNorm = 0.5512, lr_0 = 2.1454e-04
Loss = 9.2018e-02, PNorm = 83.2905, GNorm = 0.7982, lr_0 = 2.1440e-04
Loss = 9.8988e-02, PNorm = 83.2947, GNorm = 0.5613, lr_0 = 2.1425e-04
Loss = 9.4538e-02, PNorm = 83.2993, GNorm = 0.6805, lr_0 = 2.1410e-04
Loss = 9.7497e-02, PNorm = 83.3034, GNorm = 0.7091, lr_0 = 2.1396e-04
Loss = 9.1598e-02, PNorm = 83.3056, GNorm = 0.5618, lr_0 = 2.1381e-04
Loss = 9.9089e-02, PNorm = 83.3068, GNorm = 0.9086, lr_0 = 2.1366e-04
Loss = 9.3434e-02, PNorm = 83.3113, GNorm = 0.7022, lr_0 = 2.1352e-04
Loss = 8.3224e-02, PNorm = 83.3138, GNorm = 0.7260, lr_0 = 2.1337e-04
Loss = 8.1961e-02, PNorm = 83.3188, GNorm = 0.7122, lr_0 = 2.1323e-04
Loss = 8.8525e-02, PNorm = 83.3211, GNorm = 0.5901, lr_0 = 2.1308e-04
Loss = 9.5843e-02, PNorm = 83.3226, GNorm = 0.5448, lr_0 = 2.1293e-04
Loss = 9.3735e-02, PNorm = 83.3275, GNorm = 0.5027, lr_0 = 2.1279e-04
Loss = 8.1684e-02, PNorm = 83.3309, GNorm = 0.5584, lr_0 = 2.1264e-04
Loss = 9.2120e-02, PNorm = 83.3332, GNorm = 0.6394, lr_0 = 2.1250e-04
Loss = 9.9619e-02, PNorm = 83.3374, GNorm = 0.5961, lr_0 = 2.1235e-04
Loss = 9.3522e-02, PNorm = 83.3423, GNorm = 0.8027, lr_0 = 2.1221e-04
Loss = 9.2292e-02, PNorm = 83.3452, GNorm = 0.6157, lr_0 = 2.1206e-04
Loss = 9.6020e-02, PNorm = 83.3511, GNorm = 0.7473, lr_0 = 2.1191e-04
Loss = 9.4938e-02, PNorm = 83.3542, GNorm = 0.5765, lr_0 = 2.1177e-04
Loss = 1.0060e-01, PNorm = 83.3578, GNorm = 0.9730, lr_0 = 2.1162e-04
Loss = 1.0556e-01, PNorm = 83.3611, GNorm = 0.5690, lr_0 = 2.1148e-04
Loss = 9.5196e-02, PNorm = 83.3640, GNorm = 0.6056, lr_0 = 2.1133e-04
Loss = 9.7016e-02, PNorm = 83.3653, GNorm = 0.9833, lr_0 = 2.1119e-04
Loss = 9.2514e-02, PNorm = 83.3682, GNorm = 0.8274, lr_0 = 2.1104e-04
Loss = 9.4244e-02, PNorm = 83.3713, GNorm = 0.6117, lr_0 = 2.1090e-04
Loss = 8.2645e-02, PNorm = 83.3770, GNorm = 0.6155, lr_0 = 2.1076e-04
Loss = 9.4366e-02, PNorm = 83.3815, GNorm = 0.5228, lr_0 = 2.1061e-04
Loss = 9.7789e-02, PNorm = 83.3849, GNorm = 0.9089, lr_0 = 2.1047e-04
Loss = 8.6916e-02, PNorm = 83.3849, GNorm = 0.8808, lr_0 = 2.1032e-04
Loss = 9.5297e-02, PNorm = 83.3879, GNorm = 0.6193, lr_0 = 2.1018e-04
Loss = 9.5080e-02, PNorm = 83.3939, GNorm = 0.7782, lr_0 = 2.1003e-04
Loss = 8.0778e-02, PNorm = 83.3985, GNorm = 0.5433, lr_0 = 2.0989e-04
Loss = 9.6502e-02, PNorm = 83.4040, GNorm = 0.8302, lr_0 = 2.0975e-04
Loss = 9.2323e-02, PNorm = 83.4076, GNorm = 0.7072, lr_0 = 2.0960e-04
Validation mae = 0.227438
Epoch 21
Loss = 7.8783e-02, PNorm = 83.4131, GNorm = 0.5700, lr_0 = 2.0946e-04
Loss = 9.1393e-02, PNorm = 83.4157, GNorm = 0.4928, lr_0 = 2.0932e-04
Loss = 8.3374e-02, PNorm = 83.4196, GNorm = 0.9557, lr_0 = 2.0917e-04
Loss = 8.2237e-02, PNorm = 83.4228, GNorm = 0.6133, lr_0 = 2.0903e-04
Loss = 9.2752e-02, PNorm = 83.4249, GNorm = 0.6143, lr_0 = 2.0889e-04
Loss = 8.3937e-02, PNorm = 83.4268, GNorm = 0.5608, lr_0 = 2.0874e-04
Loss = 7.3212e-02, PNorm = 83.4309, GNorm = 0.4329, lr_0 = 2.0860e-04
Loss = 9.2759e-02, PNorm = 83.4384, GNorm = 0.5772, lr_0 = 2.0846e-04
Loss = 9.2188e-02, PNorm = 83.4445, GNorm = 0.9353, lr_0 = 2.0831e-04
Loss = 8.2399e-02, PNorm = 83.4488, GNorm = 0.7892, lr_0 = 2.0817e-04
Loss = 9.2789e-02, PNorm = 83.4517, GNorm = 0.6760, lr_0 = 2.0803e-04
Loss = 8.2175e-02, PNorm = 83.4567, GNorm = 0.6761, lr_0 = 2.0789e-04
Loss = 8.4146e-02, PNorm = 83.4597, GNorm = 0.5446, lr_0 = 2.0774e-04
Loss = 8.4465e-02, PNorm = 83.4621, GNorm = 0.5159, lr_0 = 2.0760e-04
Loss = 8.5431e-02, PNorm = 83.4628, GNorm = 0.4952, lr_0 = 2.0746e-04
Loss = 9.7200e-02, PNorm = 83.4669, GNorm = 0.7474, lr_0 = 2.0732e-04
Loss = 9.1332e-02, PNorm = 83.4694, GNorm = 0.7414, lr_0 = 2.0718e-04
Loss = 9.1425e-02, PNorm = 83.4730, GNorm = 0.5653, lr_0 = 2.0703e-04
Loss = 7.8564e-02, PNorm = 83.4754, GNorm = 0.6509, lr_0 = 2.0689e-04
Loss = 9.3050e-02, PNorm = 83.4799, GNorm = 0.6882, lr_0 = 2.0675e-04
Loss = 8.8565e-02, PNorm = 83.4831, GNorm = 0.6218, lr_0 = 2.0661e-04
Loss = 8.7971e-02, PNorm = 83.4885, GNorm = 0.6309, lr_0 = 2.0647e-04
Loss = 8.7510e-02, PNorm = 83.4933, GNorm = 0.6020, lr_0 = 2.0633e-04
Loss = 8.2750e-02, PNorm = 83.4965, GNorm = 0.6979, lr_0 = 2.0618e-04
Loss = 8.6831e-02, PNorm = 83.4982, GNorm = 0.6592, lr_0 = 2.0604e-04
Loss = 9.5580e-02, PNorm = 83.5024, GNorm = 0.7393, lr_0 = 2.0590e-04
Loss = 7.6940e-02, PNorm = 83.5107, GNorm = 0.5930, lr_0 = 2.0576e-04
Loss = 8.9859e-02, PNorm = 83.5180, GNorm = 0.6587, lr_0 = 2.0562e-04
Loss = 9.1899e-02, PNorm = 83.5206, GNorm = 0.4573, lr_0 = 2.0548e-04
Loss = 8.4347e-02, PNorm = 83.5241, GNorm = 0.7834, lr_0 = 2.0534e-04
Loss = 8.7858e-02, PNorm = 83.5299, GNorm = 0.6156, lr_0 = 2.0520e-04
Loss = 9.2689e-02, PNorm = 83.5355, GNorm = 0.6048, lr_0 = 2.0506e-04
Loss = 8.1529e-02, PNorm = 83.5374, GNorm = 0.6983, lr_0 = 2.0492e-04
Loss = 8.4292e-02, PNorm = 83.5393, GNorm = 0.6839, lr_0 = 2.0478e-04
Loss = 8.3464e-02, PNorm = 83.5418, GNorm = 0.7457, lr_0 = 2.0464e-04
Loss = 9.7106e-02, PNorm = 83.5453, GNorm = 0.6670, lr_0 = 2.0450e-04
Loss = 8.4461e-02, PNorm = 83.5476, GNorm = 0.5909, lr_0 = 2.0436e-04
Loss = 8.9153e-02, PNorm = 83.5487, GNorm = 0.6180, lr_0 = 2.0422e-04
Loss = 9.3965e-02, PNorm = 83.5507, GNorm = 0.7795, lr_0 = 2.0408e-04
Loss = 9.1645e-02, PNorm = 83.5518, GNorm = 0.8574, lr_0 = 2.0394e-04
Loss = 8.5829e-02, PNorm = 83.5529, GNorm = 0.7958, lr_0 = 2.0380e-04
Loss = 8.6750e-02, PNorm = 83.5584, GNorm = 0.6147, lr_0 = 2.0366e-04
Loss = 7.4476e-02, PNorm = 83.5647, GNorm = 0.6530, lr_0 = 2.0352e-04
Loss = 8.9982e-02, PNorm = 83.5677, GNorm = 0.5654, lr_0 = 2.0338e-04
Loss = 7.7753e-02, PNorm = 83.5717, GNorm = 0.5688, lr_0 = 2.0324e-04
Loss = 9.7811e-02, PNorm = 83.5764, GNorm = 1.0776, lr_0 = 2.0310e-04
Loss = 8.4370e-02, PNorm = 83.5807, GNorm = 0.6869, lr_0 = 2.0296e-04
Loss = 8.2159e-02, PNorm = 83.5832, GNorm = 0.7609, lr_0 = 2.0282e-04
Loss = 9.1182e-02, PNorm = 83.5871, GNorm = 0.8424, lr_0 = 2.0268e-04
Loss = 8.8640e-02, PNorm = 83.5934, GNorm = 0.7240, lr_0 = 2.0254e-04
Loss = 8.2817e-02, PNorm = 83.5991, GNorm = 0.6471, lr_0 = 2.0240e-04
Loss = 8.7188e-02, PNorm = 83.6052, GNorm = 0.6500, lr_0 = 2.0227e-04
Loss = 8.4599e-02, PNorm = 83.6066, GNorm = 0.5599, lr_0 = 2.0213e-04
Loss = 9.3425e-02, PNorm = 83.6082, GNorm = 0.7712, lr_0 = 2.0199e-04
Loss = 8.2815e-02, PNorm = 83.6116, GNorm = 0.7805, lr_0 = 2.0185e-04
Loss = 8.9149e-02, PNorm = 83.6155, GNorm = 0.6984, lr_0 = 2.0171e-04
Loss = 9.4772e-02, PNorm = 83.6171, GNorm = 0.8061, lr_0 = 2.0157e-04
Loss = 8.3383e-02, PNorm = 83.6214, GNorm = 0.6171, lr_0 = 2.0144e-04
Loss = 7.9121e-02, PNorm = 83.6283, GNorm = 0.7994, lr_0 = 2.0130e-04
Loss = 8.6668e-02, PNorm = 83.6333, GNorm = 0.5556, lr_0 = 2.0116e-04
Loss = 1.0220e-01, PNorm = 83.6373, GNorm = 0.6875, lr_0 = 2.0102e-04
Loss = 8.8192e-02, PNorm = 83.6416, GNorm = 0.6740, lr_0 = 2.0088e-04
Loss = 8.9894e-02, PNorm = 83.6444, GNorm = 0.6939, lr_0 = 2.0075e-04
Loss = 7.6159e-02, PNorm = 83.6473, GNorm = 0.5858, lr_0 = 2.0061e-04
Loss = 9.4754e-02, PNorm = 83.6498, GNorm = 0.6374, lr_0 = 2.0047e-04
Loss = 9.0830e-02, PNorm = 83.6552, GNorm = 0.5795, lr_0 = 2.0033e-04
Loss = 1.0689e-01, PNorm = 83.6585, GNorm = 0.7497, lr_0 = 2.0020e-04
Loss = 7.7879e-02, PNorm = 83.6627, GNorm = 0.6330, lr_0 = 2.0006e-04
Loss = 9.5715e-02, PNorm = 83.6666, GNorm = 0.4612, lr_0 = 1.9992e-04
Loss = 8.8206e-02, PNorm = 83.6720, GNorm = 0.6657, lr_0 = 1.9979e-04
Loss = 8.4198e-02, PNorm = 83.6748, GNorm = 0.5091, lr_0 = 1.9965e-04
Loss = 8.5553e-02, PNorm = 83.6802, GNorm = 0.7915, lr_0 = 1.9951e-04
Loss = 9.6534e-02, PNorm = 83.6831, GNorm = 0.7425, lr_0 = 1.9938e-04
Loss = 8.8916e-02, PNorm = 83.6885, GNorm = 0.9721, lr_0 = 1.9924e-04
Loss = 9.1425e-02, PNorm = 83.6917, GNorm = 0.5807, lr_0 = 1.9910e-04
Loss = 8.3535e-02, PNorm = 83.6954, GNorm = 0.5983, lr_0 = 1.9897e-04
Loss = 9.7104e-02, PNorm = 83.6979, GNorm = 0.8304, lr_0 = 1.9883e-04
Loss = 1.0173e-01, PNorm = 83.7016, GNorm = 0.9044, lr_0 = 1.9869e-04
Loss = 9.7535e-02, PNorm = 83.7072, GNorm = 0.5150, lr_0 = 1.9856e-04
Loss = 7.9882e-02, PNorm = 83.7096, GNorm = 0.5328, lr_0 = 1.9842e-04
Loss = 9.4471e-02, PNorm = 83.7107, GNorm = 0.5588, lr_0 = 1.9829e-04
Loss = 9.4656e-02, PNorm = 83.7152, GNorm = 0.6252, lr_0 = 1.9815e-04
Loss = 9.3347e-02, PNorm = 83.7191, GNorm = 0.5998, lr_0 = 1.9801e-04
Loss = 8.1436e-02, PNorm = 83.7199, GNorm = 0.7560, lr_0 = 1.9788e-04
Loss = 8.7555e-02, PNorm = 83.7241, GNorm = 0.6231, lr_0 = 1.9774e-04
Loss = 8.2234e-02, PNorm = 83.7306, GNorm = 0.7090, lr_0 = 1.9761e-04
Loss = 8.4939e-02, PNorm = 83.7343, GNorm = 0.6761, lr_0 = 1.9747e-04
Loss = 9.4192e-02, PNorm = 83.7384, GNorm = 0.7335, lr_0 = 1.9734e-04
Loss = 8.6543e-02, PNorm = 83.7421, GNorm = 0.6475, lr_0 = 1.9720e-04
Loss = 9.1016e-02, PNorm = 83.7459, GNorm = 0.6199, lr_0 = 1.9707e-04
Loss = 8.9717e-02, PNorm = 83.7502, GNorm = 0.8039, lr_0 = 1.9693e-04
Loss = 8.4591e-02, PNorm = 83.7528, GNorm = 0.4912, lr_0 = 1.9680e-04
Loss = 9.1384e-02, PNorm = 83.7553, GNorm = 0.5231, lr_0 = 1.9666e-04
Loss = 9.0662e-02, PNorm = 83.7593, GNorm = 0.8363, lr_0 = 1.9653e-04
Loss = 9.2868e-02, PNorm = 83.7626, GNorm = 0.8944, lr_0 = 1.9639e-04
Loss = 1.1056e-01, PNorm = 83.7636, GNorm = 0.5458, lr_0 = 1.9626e-04
Loss = 9.0628e-02, PNorm = 83.7666, GNorm = 0.6538, lr_0 = 1.9612e-04
Loss = 8.8265e-02, PNorm = 83.7679, GNorm = 0.6197, lr_0 = 1.9599e-04
Loss = 7.9260e-02, PNorm = 83.7730, GNorm = 0.6319, lr_0 = 1.9585e-04
Loss = 8.4727e-02, PNorm = 83.7762, GNorm = 0.8034, lr_0 = 1.9572e-04
Loss = 9.8476e-02, PNorm = 83.7779, GNorm = 0.7920, lr_0 = 1.9559e-04
Loss = 8.8217e-02, PNorm = 83.7812, GNorm = 0.7887, lr_0 = 1.9545e-04
Loss = 9.2739e-02, PNorm = 83.7844, GNorm = 0.5147, lr_0 = 1.9532e-04
Loss = 8.2684e-02, PNorm = 83.7894, GNorm = 0.7356, lr_0 = 1.9518e-04
Loss = 9.5032e-02, PNorm = 83.7916, GNorm = 0.6488, lr_0 = 1.9505e-04
Loss = 8.5388e-02, PNorm = 83.7924, GNorm = 0.4998, lr_0 = 1.9492e-04
Loss = 8.8487e-02, PNorm = 83.7956, GNorm = 0.8361, lr_0 = 1.9478e-04
Loss = 8.1385e-02, PNorm = 83.7972, GNorm = 0.7079, lr_0 = 1.9465e-04
Loss = 7.5835e-02, PNorm = 83.7982, GNorm = 0.4994, lr_0 = 1.9452e-04
Loss = 8.5102e-02, PNorm = 83.8009, GNorm = 0.6598, lr_0 = 1.9438e-04
Loss = 9.0081e-02, PNorm = 83.8069, GNorm = 1.0225, lr_0 = 1.9425e-04
Loss = 9.8531e-02, PNorm = 83.8101, GNorm = 0.8066, lr_0 = 1.9412e-04
Loss = 8.7127e-02, PNorm = 83.8170, GNorm = 0.5937, lr_0 = 1.9398e-04
Loss = 9.4931e-02, PNorm = 83.8235, GNorm = 0.5643, lr_0 = 1.9385e-04
Loss = 9.2850e-02, PNorm = 83.8283, GNorm = 0.6690, lr_0 = 1.9372e-04
Loss = 8.8040e-02, PNorm = 83.8314, GNorm = 0.8427, lr_0 = 1.9359e-04
Loss = 9.7958e-02, PNorm = 83.8354, GNorm = 0.6436, lr_0 = 1.9345e-04
Loss = 1.0248e-01, PNorm = 83.8370, GNorm = 0.6774, lr_0 = 1.9332e-04
Loss = 8.4992e-02, PNorm = 83.8402, GNorm = 0.5815, lr_0 = 1.9319e-04
Loss = 9.0509e-02, PNorm = 83.8419, GNorm = 0.6029, lr_0 = 1.9306e-04
Validation mae = 0.228288
Epoch 22
Loss = 7.2666e-02, PNorm = 83.8439, GNorm = 0.5460, lr_0 = 1.9292e-04
Loss = 9.2277e-02, PNorm = 83.8445, GNorm = 0.9000, lr_0 = 1.9279e-04
Loss = 8.0183e-02, PNorm = 83.8461, GNorm = 0.6940, lr_0 = 1.9266e-04
Loss = 8.2780e-02, PNorm = 83.8505, GNorm = 0.6191, lr_0 = 1.9253e-04
Loss = 9.3747e-02, PNorm = 83.8574, GNorm = 0.6312, lr_0 = 1.9240e-04
Loss = 7.7695e-02, PNorm = 83.8603, GNorm = 0.5382, lr_0 = 1.9226e-04
Loss = 8.4527e-02, PNorm = 83.8641, GNorm = 0.6876, lr_0 = 1.9213e-04
Loss = 7.3629e-02, PNorm = 83.8693, GNorm = 0.5446, lr_0 = 1.9200e-04
Loss = 8.3210e-02, PNorm = 83.8743, GNorm = 0.7600, lr_0 = 1.9187e-04
Loss = 8.7247e-02, PNorm = 83.8773, GNorm = 0.6877, lr_0 = 1.9174e-04
Loss = 8.6092e-02, PNorm = 83.8816, GNorm = 0.5916, lr_0 = 1.9161e-04
Loss = 9.3273e-02, PNorm = 83.8855, GNorm = 0.7176, lr_0 = 1.9148e-04
Loss = 9.0112e-02, PNorm = 83.8889, GNorm = 0.8621, lr_0 = 1.9134e-04
Loss = 8.5121e-02, PNorm = 83.8903, GNorm = 0.6316, lr_0 = 1.9121e-04
Loss = 8.4528e-02, PNorm = 83.8922, GNorm = 0.7725, lr_0 = 1.9108e-04
Loss = 8.1794e-02, PNorm = 83.8938, GNorm = 0.5402, lr_0 = 1.9095e-04
Loss = 8.5463e-02, PNorm = 83.8983, GNorm = 0.5397, lr_0 = 1.9082e-04
Loss = 7.5982e-02, PNorm = 83.9013, GNorm = 0.5403, lr_0 = 1.9069e-04
Loss = 7.9913e-02, PNorm = 83.9031, GNorm = 0.4935, lr_0 = 1.9056e-04
Loss = 9.1618e-02, PNorm = 83.9064, GNorm = 0.7604, lr_0 = 1.9043e-04
Loss = 8.1919e-02, PNorm = 83.9117, GNorm = 0.5145, lr_0 = 1.9030e-04
Loss = 8.3067e-02, PNorm = 83.9176, GNorm = 0.6058, lr_0 = 1.9017e-04
Loss = 8.9932e-02, PNorm = 83.9206, GNorm = 0.7679, lr_0 = 1.9004e-04
Loss = 8.3328e-02, PNorm = 83.9238, GNorm = 0.6624, lr_0 = 1.8991e-04
Loss = 8.0002e-02, PNorm = 83.9268, GNorm = 0.9044, lr_0 = 1.8978e-04
Loss = 8.6879e-02, PNorm = 83.9313, GNorm = 0.6153, lr_0 = 1.8965e-04
Loss = 9.4119e-02, PNorm = 83.9347, GNorm = 1.0848, lr_0 = 1.8952e-04
Loss = 8.0708e-02, PNorm = 83.9423, GNorm = 0.5811, lr_0 = 1.8939e-04
Loss = 8.2118e-02, PNorm = 83.9447, GNorm = 0.5107, lr_0 = 1.8926e-04
Loss = 7.7428e-02, PNorm = 83.9495, GNorm = 0.4837, lr_0 = 1.8913e-04
Loss = 8.5666e-02, PNorm = 83.9540, GNorm = 0.6390, lr_0 = 1.8900e-04
Loss = 9.5276e-02, PNorm = 83.9557, GNorm = 1.1428, lr_0 = 1.8887e-04
Loss = 8.5127e-02, PNorm = 83.9610, GNorm = 0.6222, lr_0 = 1.8874e-04
Loss = 8.4668e-02, PNorm = 83.9635, GNorm = 0.7828, lr_0 = 1.8861e-04
Loss = 8.1511e-02, PNorm = 83.9645, GNorm = 0.7043, lr_0 = 1.8848e-04
Loss = 8.2968e-02, PNorm = 83.9687, GNorm = 0.8209, lr_0 = 1.8835e-04
Loss = 9.2518e-02, PNorm = 83.9742, GNorm = 0.5712, lr_0 = 1.8822e-04
Loss = 7.9936e-02, PNorm = 83.9781, GNorm = 0.7450, lr_0 = 1.8809e-04
Loss = 8.2187e-02, PNorm = 83.9806, GNorm = 0.6391, lr_0 = 1.8797e-04
Loss = 8.7368e-02, PNorm = 83.9826, GNorm = 0.7500, lr_0 = 1.8784e-04
Loss = 8.5474e-02, PNorm = 83.9870, GNorm = 0.5971, lr_0 = 1.8771e-04
Loss = 8.1847e-02, PNorm = 83.9905, GNorm = 0.8549, lr_0 = 1.8758e-04
Loss = 7.0886e-02, PNorm = 83.9931, GNorm = 0.5791, lr_0 = 1.8745e-04
Loss = 8.0831e-02, PNorm = 83.9962, GNorm = 0.9846, lr_0 = 1.8732e-04
Loss = 7.8494e-02, PNorm = 83.9996, GNorm = 0.5350, lr_0 = 1.8719e-04
Loss = 8.0482e-02, PNorm = 84.0042, GNorm = 0.6385, lr_0 = 1.8707e-04
Loss = 1.0698e-01, PNorm = 84.0051, GNorm = 0.6429, lr_0 = 1.8694e-04
Loss = 8.7921e-02, PNorm = 84.0086, GNorm = 0.5673, lr_0 = 1.8681e-04
Loss = 8.4732e-02, PNorm = 84.0117, GNorm = 0.6895, lr_0 = 1.8668e-04
Loss = 7.1393e-02, PNorm = 84.0144, GNorm = 0.7498, lr_0 = 1.8655e-04
Loss = 9.7665e-02, PNorm = 84.0174, GNorm = 0.5879, lr_0 = 1.8643e-04
Loss = 8.9413e-02, PNorm = 84.0188, GNorm = 0.5126, lr_0 = 1.8630e-04
Loss = 8.1430e-02, PNorm = 84.0217, GNorm = 0.6171, lr_0 = 1.8617e-04
Loss = 8.9457e-02, PNorm = 84.0251, GNorm = 0.6669, lr_0 = 1.8604e-04
Loss = 9.3304e-02, PNorm = 84.0286, GNorm = 0.5172, lr_0 = 1.8592e-04
Loss = 8.3006e-02, PNorm = 84.0311, GNorm = 0.6417, lr_0 = 1.8579e-04
Loss = 8.7079e-02, PNorm = 84.0340, GNorm = 0.8409, lr_0 = 1.8566e-04
Loss = 9.3959e-02, PNorm = 84.0361, GNorm = 0.6237, lr_0 = 1.8553e-04
Loss = 8.3806e-02, PNorm = 84.0384, GNorm = 0.5662, lr_0 = 1.8541e-04
Loss = 8.2150e-02, PNorm = 84.0430, GNorm = 0.4878, lr_0 = 1.8528e-04
Loss = 9.1701e-02, PNorm = 84.0482, GNorm = 0.5030, lr_0 = 1.8515e-04
Loss = 8.8514e-02, PNorm = 84.0504, GNorm = 0.5967, lr_0 = 1.8503e-04
Loss = 8.4952e-02, PNorm = 84.0555, GNorm = 0.4979, lr_0 = 1.8490e-04
Loss = 8.9051e-02, PNorm = 84.0607, GNorm = 0.5278, lr_0 = 1.8477e-04
Loss = 8.2212e-02, PNorm = 84.0645, GNorm = 0.6566, lr_0 = 1.8465e-04
Loss = 9.9468e-02, PNorm = 84.0645, GNorm = 0.5421, lr_0 = 1.8452e-04
Loss = 8.7234e-02, PNorm = 84.0665, GNorm = 0.5620, lr_0 = 1.8439e-04
Loss = 9.0258e-02, PNorm = 84.0692, GNorm = 0.6294, lr_0 = 1.8427e-04
Loss = 7.8625e-02, PNorm = 84.0723, GNorm = 0.6387, lr_0 = 1.8414e-04
Loss = 9.3203e-02, PNorm = 84.0749, GNorm = 0.6164, lr_0 = 1.8401e-04
Loss = 9.0151e-02, PNorm = 84.0773, GNorm = 0.5281, lr_0 = 1.8389e-04
Loss = 9.2439e-02, PNorm = 84.0800, GNorm = 0.6181, lr_0 = 1.8376e-04
Loss = 8.5379e-02, PNorm = 84.0841, GNorm = 0.7395, lr_0 = 1.8364e-04
Loss = 9.9853e-02, PNorm = 84.0862, GNorm = 0.9104, lr_0 = 1.8351e-04
Loss = 9.4722e-02, PNorm = 84.0890, GNorm = 0.6877, lr_0 = 1.8338e-04
Loss = 8.6918e-02, PNorm = 84.0921, GNorm = 0.5160, lr_0 = 1.8326e-04
Loss = 8.6639e-02, PNorm = 84.0931, GNorm = 0.5942, lr_0 = 1.8313e-04
Loss = 8.5075e-02, PNorm = 84.0969, GNorm = 0.6256, lr_0 = 1.8301e-04
Loss = 7.9653e-02, PNorm = 84.0998, GNorm = 0.5059, lr_0 = 1.8288e-04
Loss = 9.4736e-02, PNorm = 84.1028, GNorm = 0.7101, lr_0 = 1.8276e-04
Loss = 9.5605e-02, PNorm = 84.1053, GNorm = 0.6698, lr_0 = 1.8263e-04
Loss = 8.0627e-02, PNorm = 84.1063, GNorm = 0.7384, lr_0 = 1.8251e-04
Loss = 7.9869e-02, PNorm = 84.1097, GNorm = 0.4958, lr_0 = 1.8238e-04
Loss = 8.5467e-02, PNorm = 84.1139, GNorm = 0.8604, lr_0 = 1.8226e-04
Loss = 8.5268e-02, PNorm = 84.1190, GNorm = 0.4913, lr_0 = 1.8213e-04
Loss = 8.9728e-02, PNorm = 84.1213, GNorm = 0.7595, lr_0 = 1.8201e-04
Loss = 7.9839e-02, PNorm = 84.1225, GNorm = 0.6306, lr_0 = 1.8188e-04
Loss = 8.8618e-02, PNorm = 84.1264, GNorm = 0.7197, lr_0 = 1.8176e-04
Loss = 8.6572e-02, PNorm = 84.1320, GNorm = 0.6236, lr_0 = 1.8163e-04
Loss = 7.2581e-02, PNorm = 84.1362, GNorm = 0.5417, lr_0 = 1.8151e-04
Loss = 8.5781e-02, PNorm = 84.1385, GNorm = 0.6117, lr_0 = 1.8138e-04
Loss = 8.8425e-02, PNorm = 84.1405, GNorm = 0.6679, lr_0 = 1.8126e-04
Loss = 9.2413e-02, PNorm = 84.1431, GNorm = 0.5682, lr_0 = 1.8114e-04
Loss = 8.9905e-02, PNorm = 84.1449, GNorm = 0.7807, lr_0 = 1.8101e-04
Loss = 9.0713e-02, PNorm = 84.1448, GNorm = 0.6102, lr_0 = 1.8089e-04
Loss = 8.1200e-02, PNorm = 84.1481, GNorm = 0.5563, lr_0 = 1.8076e-04
Loss = 8.9236e-02, PNorm = 84.1511, GNorm = 0.5573, lr_0 = 1.8064e-04
Loss = 8.3908e-02, PNorm = 84.1551, GNorm = 0.5833, lr_0 = 1.8052e-04
Loss = 9.3400e-02, PNorm = 84.1602, GNorm = 0.8382, lr_0 = 1.8039e-04
Loss = 8.1857e-02, PNorm = 84.1596, GNorm = 0.7298, lr_0 = 1.8027e-04
Loss = 8.8548e-02, PNorm = 84.1605, GNorm = 0.6551, lr_0 = 1.8015e-04
Loss = 9.1129e-02, PNorm = 84.1646, GNorm = 1.0419, lr_0 = 1.8002e-04
Loss = 7.8486e-02, PNorm = 84.1677, GNorm = 0.7180, lr_0 = 1.7990e-04
Loss = 7.5168e-02, PNorm = 84.1700, GNorm = 0.6859, lr_0 = 1.7978e-04
Loss = 9.2356e-02, PNorm = 84.1727, GNorm = 0.6089, lr_0 = 1.7965e-04
Loss = 8.9427e-02, PNorm = 84.1751, GNorm = 0.6112, lr_0 = 1.7953e-04
Loss = 9.9399e-02, PNorm = 84.1796, GNorm = 0.7983, lr_0 = 1.7941e-04
Loss = 8.1467e-02, PNorm = 84.1851, GNorm = 0.7170, lr_0 = 1.7928e-04
Loss = 9.7384e-02, PNorm = 84.1883, GNorm = 0.6110, lr_0 = 1.7916e-04
Loss = 9.8600e-02, PNorm = 84.1937, GNorm = 0.6304, lr_0 = 1.7904e-04
Loss = 8.9780e-02, PNorm = 84.1968, GNorm = 0.6098, lr_0 = 1.7892e-04
Loss = 8.7876e-02, PNorm = 84.1977, GNorm = 0.7751, lr_0 = 1.7879e-04
Loss = 9.5392e-02, PNorm = 84.2004, GNorm = 0.6373, lr_0 = 1.7867e-04
Loss = 8.3915e-02, PNorm = 84.2024, GNorm = 0.6614, lr_0 = 1.7855e-04
Loss = 7.7948e-02, PNorm = 84.2052, GNorm = 0.4647, lr_0 = 1.7843e-04
Loss = 9.4214e-02, PNorm = 84.2071, GNorm = 0.6766, lr_0 = 1.7830e-04
Loss = 9.2825e-02, PNorm = 84.2105, GNorm = 0.9298, lr_0 = 1.7818e-04
Loss = 9.3064e-02, PNorm = 84.2152, GNorm = 0.7335, lr_0 = 1.7806e-04
Loss = 8.2099e-02, PNorm = 84.2194, GNorm = 0.9613, lr_0 = 1.7794e-04
Loss = 1.0070e-01, PNorm = 84.2235, GNorm = 0.7482, lr_0 = 1.7782e-04
Validation mae = 0.227198
Epoch 23
Loss = 7.7647e-02, PNorm = 84.2267, GNorm = 0.6615, lr_0 = 1.7769e-04
Loss = 6.7128e-02, PNorm = 84.2292, GNorm = 0.5980, lr_0 = 1.7757e-04
Loss = 8.4768e-02, PNorm = 84.2309, GNorm = 0.6550, lr_0 = 1.7745e-04
Loss = 7.5467e-02, PNorm = 84.2345, GNorm = 0.7785, lr_0 = 1.7733e-04
Loss = 8.7835e-02, PNorm = 84.2377, GNorm = 0.7104, lr_0 = 1.7721e-04
Loss = 9.7321e-02, PNorm = 84.2397, GNorm = 0.7341, lr_0 = 1.7709e-04
Loss = 8.4496e-02, PNorm = 84.2436, GNorm = 0.5749, lr_0 = 1.7696e-04
Loss = 9.0312e-02, PNorm = 84.2475, GNorm = 0.6140, lr_0 = 1.7684e-04
Loss = 8.6395e-02, PNorm = 84.2498, GNorm = 0.5900, lr_0 = 1.7672e-04
Loss = 8.6661e-02, PNorm = 84.2506, GNorm = 0.5432, lr_0 = 1.7660e-04
Loss = 7.4790e-02, PNorm = 84.2516, GNorm = 0.6249, lr_0 = 1.7648e-04
Loss = 8.4766e-02, PNorm = 84.2554, GNorm = 0.6287, lr_0 = 1.7636e-04
Loss = 8.5004e-02, PNorm = 84.2581, GNorm = 0.5495, lr_0 = 1.7624e-04
Loss = 8.4822e-02, PNorm = 84.2568, GNorm = 0.6272, lr_0 = 1.7612e-04
Loss = 8.1491e-02, PNorm = 84.2598, GNorm = 0.8387, lr_0 = 1.7600e-04
Loss = 8.3049e-02, PNorm = 84.2638, GNorm = 0.5338, lr_0 = 1.7588e-04
Loss = 8.3187e-02, PNorm = 84.2670, GNorm = 0.4984, lr_0 = 1.7576e-04
Loss = 8.0010e-02, PNorm = 84.2713, GNorm = 0.6836, lr_0 = 1.7564e-04
Loss = 7.8916e-02, PNorm = 84.2753, GNorm = 0.6020, lr_0 = 1.7552e-04
Loss = 9.2791e-02, PNorm = 84.2779, GNorm = 0.9253, lr_0 = 1.7540e-04
Loss = 7.5075e-02, PNorm = 84.2801, GNorm = 0.5952, lr_0 = 1.7528e-04
Loss = 8.7626e-02, PNorm = 84.2811, GNorm = 0.8049, lr_0 = 1.7516e-04
Loss = 8.4465e-02, PNorm = 84.2840, GNorm = 0.6036, lr_0 = 1.7504e-04
Loss = 6.5222e-02, PNorm = 84.2882, GNorm = 0.5953, lr_0 = 1.7492e-04
Loss = 8.6577e-02, PNorm = 84.2907, GNorm = 0.6507, lr_0 = 1.7480e-04
Loss = 9.7369e-02, PNorm = 84.2920, GNorm = 0.7544, lr_0 = 1.7468e-04
Loss = 8.3816e-02, PNorm = 84.2944, GNorm = 0.6152, lr_0 = 1.7456e-04
Loss = 8.2040e-02, PNorm = 84.2960, GNorm = 0.5593, lr_0 = 1.7444e-04
Loss = 7.8864e-02, PNorm = 84.2996, GNorm = 0.7791, lr_0 = 1.7432e-04
Loss = 8.0629e-02, PNorm = 84.3029, GNorm = 0.8491, lr_0 = 1.7420e-04
Loss = 9.3453e-02, PNorm = 84.3051, GNorm = 0.6343, lr_0 = 1.7408e-04
Loss = 8.6147e-02, PNorm = 84.3074, GNorm = 0.5501, lr_0 = 1.7396e-04
Loss = 9.3533e-02, PNorm = 84.3091, GNorm = 0.7347, lr_0 = 1.7384e-04
Loss = 8.8027e-02, PNorm = 84.3120, GNorm = 0.8862, lr_0 = 1.7372e-04
Loss = 7.5768e-02, PNorm = 84.3163, GNorm = 0.5345, lr_0 = 1.7360e-04
Loss = 7.8991e-02, PNorm = 84.3218, GNorm = 0.6257, lr_0 = 1.7348e-04
Loss = 8.7832e-02, PNorm = 84.3261, GNorm = 0.5406, lr_0 = 1.7336e-04
Loss = 8.5208e-02, PNorm = 84.3298, GNorm = 0.5300, lr_0 = 1.7325e-04
Loss = 8.1574e-02, PNorm = 84.3318, GNorm = 0.5817, lr_0 = 1.7313e-04
Loss = 7.5180e-02, PNorm = 84.3330, GNorm = 0.5268, lr_0 = 1.7301e-04
Loss = 8.7402e-02, PNorm = 84.3342, GNorm = 0.6892, lr_0 = 1.7289e-04
Loss = 8.7218e-02, PNorm = 84.3378, GNorm = 0.7289, lr_0 = 1.7277e-04
Loss = 8.7750e-02, PNorm = 84.3404, GNorm = 0.9501, lr_0 = 1.7265e-04
Loss = 8.4592e-02, PNorm = 84.3434, GNorm = 0.6167, lr_0 = 1.7253e-04
Loss = 9.1377e-02, PNorm = 84.3481, GNorm = 0.7235, lr_0 = 1.7242e-04
Loss = 8.0327e-02, PNorm = 84.3525, GNorm = 0.7550, lr_0 = 1.7230e-04
Loss = 7.0460e-02, PNorm = 84.3537, GNorm = 0.4737, lr_0 = 1.7218e-04
Loss = 7.2556e-02, PNorm = 84.3578, GNorm = 0.7699, lr_0 = 1.7206e-04
Loss = 9.4825e-02, PNorm = 84.3599, GNorm = 0.7473, lr_0 = 1.7194e-04
Loss = 1.0072e-01, PNorm = 84.3621, GNorm = 0.8462, lr_0 = 1.7183e-04
Loss = 9.1331e-02, PNorm = 84.3626, GNorm = 0.9716, lr_0 = 1.7171e-04
Loss = 7.8626e-02, PNorm = 84.3658, GNorm = 0.6034, lr_0 = 1.7159e-04
Loss = 7.5979e-02, PNorm = 84.3680, GNorm = 0.7178, lr_0 = 1.7147e-04
Loss = 8.4304e-02, PNorm = 84.3699, GNorm = 0.5310, lr_0 = 1.7136e-04
Loss = 9.0386e-02, PNorm = 84.3710, GNorm = 0.6556, lr_0 = 1.7124e-04
Loss = 7.9856e-02, PNorm = 84.3754, GNorm = 0.5387, lr_0 = 1.7112e-04
Loss = 7.2859e-02, PNorm = 84.3806, GNorm = 0.8118, lr_0 = 1.7100e-04
Loss = 8.2378e-02, PNorm = 84.3834, GNorm = 0.6104, lr_0 = 1.7089e-04
Loss = 9.0241e-02, PNorm = 84.3868, GNorm = 0.7865, lr_0 = 1.7077e-04
Loss = 9.3692e-02, PNorm = 84.3897, GNorm = 0.7127, lr_0 = 1.7065e-04
Loss = 7.5224e-02, PNorm = 84.3931, GNorm = 0.6467, lr_0 = 1.7054e-04
Loss = 8.4906e-02, PNorm = 84.3968, GNorm = 0.7033, lr_0 = 1.7042e-04
Loss = 7.6160e-02, PNorm = 84.3999, GNorm = 0.5143, lr_0 = 1.7030e-04
Loss = 7.8523e-02, PNorm = 84.4037, GNorm = 0.6672, lr_0 = 1.7019e-04
Loss = 8.5581e-02, PNorm = 84.4057, GNorm = 0.7167, lr_0 = 1.7007e-04
Loss = 8.5541e-02, PNorm = 84.4069, GNorm = 0.7247, lr_0 = 1.6995e-04
Loss = 9.2417e-02, PNorm = 84.4103, GNorm = 0.5931, lr_0 = 1.6984e-04
Loss = 9.8455e-02, PNorm = 84.4152, GNorm = 0.7264, lr_0 = 1.6972e-04
Loss = 6.8772e-02, PNorm = 84.4172, GNorm = 0.5378, lr_0 = 1.6960e-04
Loss = 8.1324e-02, PNorm = 84.4210, GNorm = 0.6632, lr_0 = 1.6949e-04
Loss = 8.8682e-02, PNorm = 84.4241, GNorm = 0.6186, lr_0 = 1.6937e-04
Loss = 7.6915e-02, PNorm = 84.4246, GNorm = 0.6536, lr_0 = 1.6926e-04
Loss = 8.1900e-02, PNorm = 84.4284, GNorm = 0.7300, lr_0 = 1.6914e-04
Loss = 8.1363e-02, PNorm = 84.4322, GNorm = 0.6380, lr_0 = 1.6902e-04
Loss = 8.3325e-02, PNorm = 84.4336, GNorm = 0.5132, lr_0 = 1.6891e-04
Loss = 8.8317e-02, PNorm = 84.4350, GNorm = 0.5718, lr_0 = 1.6879e-04
Loss = 9.1363e-02, PNorm = 84.4386, GNorm = 0.6139, lr_0 = 1.6868e-04
Loss = 7.8862e-02, PNorm = 84.4424, GNorm = 0.3871, lr_0 = 1.6856e-04
Loss = 9.4080e-02, PNorm = 84.4433, GNorm = 0.5074, lr_0 = 1.6845e-04
Loss = 8.2773e-02, PNorm = 84.4436, GNorm = 0.5514, lr_0 = 1.6833e-04
Loss = 8.0728e-02, PNorm = 84.4484, GNorm = 0.5720, lr_0 = 1.6821e-04
Loss = 7.2517e-02, PNorm = 84.4505, GNorm = 0.9676, lr_0 = 1.6810e-04
Loss = 8.5853e-02, PNorm = 84.4518, GNorm = 0.5135, lr_0 = 1.6798e-04
Loss = 8.7015e-02, PNorm = 84.4545, GNorm = 0.9430, lr_0 = 1.6787e-04
Loss = 8.1905e-02, PNorm = 84.4564, GNorm = 0.5822, lr_0 = 1.6775e-04
Loss = 8.9972e-02, PNorm = 84.4617, GNorm = 0.7373, lr_0 = 1.6764e-04
Loss = 9.5218e-02, PNorm = 84.4661, GNorm = 0.7676, lr_0 = 1.6752e-04
Loss = 9.4538e-02, PNorm = 84.4689, GNorm = 0.7904, lr_0 = 1.6741e-04
Loss = 8.1654e-02, PNorm = 84.4718, GNorm = 0.6175, lr_0 = 1.6729e-04
Loss = 9.1990e-02, PNorm = 84.4764, GNorm = 0.5879, lr_0 = 1.6718e-04
Loss = 8.8404e-02, PNorm = 84.4798, GNorm = 0.6321, lr_0 = 1.6707e-04
Loss = 7.4084e-02, PNorm = 84.4809, GNorm = 0.5244, lr_0 = 1.6695e-04
Loss = 8.7472e-02, PNorm = 84.4848, GNorm = 0.7114, lr_0 = 1.6684e-04
Loss = 9.5429e-02, PNorm = 84.4878, GNorm = 0.8912, lr_0 = 1.6672e-04
Loss = 8.1742e-02, PNorm = 84.4922, GNorm = 0.7796, lr_0 = 1.6661e-04
Loss = 8.1216e-02, PNorm = 84.4967, GNorm = 0.7548, lr_0 = 1.6649e-04
Loss = 9.2954e-02, PNorm = 84.5000, GNorm = 0.6856, lr_0 = 1.6638e-04
Loss = 8.0708e-02, PNorm = 84.5009, GNorm = 0.6615, lr_0 = 1.6627e-04
Loss = 9.1497e-02, PNorm = 84.5034, GNorm = 0.6491, lr_0 = 1.6615e-04
Loss = 8.9972e-02, PNorm = 84.5081, GNorm = 0.6448, lr_0 = 1.6604e-04
Loss = 7.3127e-02, PNorm = 84.5126, GNorm = 0.6754, lr_0 = 1.6592e-04
Loss = 8.7011e-02, PNorm = 84.5134, GNorm = 0.5167, lr_0 = 1.6581e-04
Loss = 8.8861e-02, PNorm = 84.5170, GNorm = 0.6756, lr_0 = 1.6570e-04
Loss = 8.9945e-02, PNorm = 84.5208, GNorm = 0.6058, lr_0 = 1.6558e-04
Loss = 8.8783e-02, PNorm = 84.5218, GNorm = 0.6706, lr_0 = 1.6547e-04
Loss = 9.0321e-02, PNorm = 84.5244, GNorm = 0.7948, lr_0 = 1.6536e-04
Loss = 9.7866e-02, PNorm = 84.5286, GNorm = 0.6870, lr_0 = 1.6524e-04
Loss = 9.5667e-02, PNorm = 84.5315, GNorm = 0.7958, lr_0 = 1.6513e-04
Loss = 7.7198e-02, PNorm = 84.5355, GNorm = 0.6077, lr_0 = 1.6502e-04
Loss = 8.3380e-02, PNorm = 84.5387, GNorm = 0.7030, lr_0 = 1.6490e-04
Loss = 8.0543e-02, PNorm = 84.5432, GNorm = 0.5461, lr_0 = 1.6479e-04
Loss = 7.2927e-02, PNorm = 84.5458, GNorm = 0.5095, lr_0 = 1.6468e-04
Loss = 7.5835e-02, PNorm = 84.5490, GNorm = 0.4862, lr_0 = 1.6457e-04
Loss = 8.0787e-02, PNorm = 84.5509, GNorm = 0.6252, lr_0 = 1.6445e-04
Loss = 6.9978e-02, PNorm = 84.5542, GNorm = 0.6461, lr_0 = 1.6434e-04
Loss = 8.4812e-02, PNorm = 84.5564, GNorm = 0.6991, lr_0 = 1.6423e-04
Loss = 8.2303e-02, PNorm = 84.5585, GNorm = 0.6704, lr_0 = 1.6412e-04
Loss = 8.3606e-02, PNorm = 84.5619, GNorm = 0.6311, lr_0 = 1.6400e-04
Loss = 8.2607e-02, PNorm = 84.5647, GNorm = 0.6288, lr_0 = 1.6389e-04
Loss = 8.7393e-02, PNorm = 84.5672, GNorm = 0.8263, lr_0 = 1.6378e-04
Validation mae = 0.229212
Epoch 24
Loss = 7.4157e-02, PNorm = 84.5665, GNorm = 0.5216, lr_0 = 1.6367e-04
Loss = 7.6759e-02, PNorm = 84.5683, GNorm = 0.8426, lr_0 = 1.6355e-04
Loss = 7.9993e-02, PNorm = 84.5701, GNorm = 0.8197, lr_0 = 1.6344e-04
Loss = 7.6884e-02, PNorm = 84.5721, GNorm = 0.8381, lr_0 = 1.6333e-04
Loss = 8.9914e-02, PNorm = 84.5762, GNorm = 0.9796, lr_0 = 1.6322e-04
Loss = 7.9201e-02, PNorm = 84.5803, GNorm = 0.4658, lr_0 = 1.6311e-04
Loss = 8.7584e-02, PNorm = 84.5847, GNorm = 0.6365, lr_0 = 1.6299e-04
Loss = 8.9247e-02, PNorm = 84.5873, GNorm = 0.5928, lr_0 = 1.6288e-04
Loss = 7.5764e-02, PNorm = 84.5884, GNorm = 0.7354, lr_0 = 1.6277e-04
Loss = 7.9108e-02, PNorm = 84.5897, GNorm = 0.5952, lr_0 = 1.6266e-04
Loss = 8.1397e-02, PNorm = 84.5916, GNorm = 0.4903, lr_0 = 1.6255e-04
Loss = 8.3269e-02, PNorm = 84.5921, GNorm = 0.6295, lr_0 = 1.6244e-04
Loss = 7.7123e-02, PNorm = 84.5961, GNorm = 0.6273, lr_0 = 1.6233e-04
Loss = 7.9303e-02, PNorm = 84.5979, GNorm = 0.7042, lr_0 = 1.6221e-04
Loss = 7.5873e-02, PNorm = 84.5998, GNorm = 0.6229, lr_0 = 1.6210e-04
Loss = 8.1928e-02, PNorm = 84.6043, GNorm = 0.7538, lr_0 = 1.6199e-04
Loss = 7.2626e-02, PNorm = 84.6042, GNorm = 0.5821, lr_0 = 1.6188e-04
Loss = 8.4836e-02, PNorm = 84.6063, GNorm = 0.5878, lr_0 = 1.6177e-04
Loss = 6.9633e-02, PNorm = 84.6097, GNorm = 0.3927, lr_0 = 1.6166e-04
Loss = 7.7822e-02, PNorm = 84.6115, GNorm = 0.4953, lr_0 = 1.6155e-04
Loss = 7.6506e-02, PNorm = 84.6133, GNorm = 0.5365, lr_0 = 1.6144e-04
Loss = 7.3866e-02, PNorm = 84.6167, GNorm = 0.5983, lr_0 = 1.6133e-04
Loss = 9.1229e-02, PNorm = 84.6171, GNorm = 0.7094, lr_0 = 1.6122e-04
Loss = 7.6834e-02, PNorm = 84.6169, GNorm = 0.6814, lr_0 = 1.6111e-04
Loss = 9.3050e-02, PNorm = 84.6213, GNorm = 0.7005, lr_0 = 1.6100e-04
Loss = 7.7759e-02, PNorm = 84.6265, GNorm = 0.7228, lr_0 = 1.6089e-04
Loss = 8.9904e-02, PNorm = 84.6306, GNorm = 0.7280, lr_0 = 1.6078e-04
Loss = 8.0562e-02, PNorm = 84.6311, GNorm = 0.6228, lr_0 = 1.6067e-04
Loss = 7.6618e-02, PNorm = 84.6354, GNorm = 0.6728, lr_0 = 1.6056e-04
Loss = 6.7731e-02, PNorm = 84.6397, GNorm = 0.4621, lr_0 = 1.6045e-04
Loss = 7.7226e-02, PNorm = 84.6426, GNorm = 0.7109, lr_0 = 1.6034e-04
Loss = 8.2898e-02, PNorm = 84.6464, GNorm = 0.7671, lr_0 = 1.6023e-04
Loss = 7.7565e-02, PNorm = 84.6506, GNorm = 0.5704, lr_0 = 1.6012e-04
Loss = 7.9150e-02, PNorm = 84.6534, GNorm = 0.6061, lr_0 = 1.6001e-04
Loss = 7.7896e-02, PNorm = 84.6583, GNorm = 0.7070, lr_0 = 1.5990e-04
Loss = 8.4409e-02, PNorm = 84.6621, GNorm = 0.7025, lr_0 = 1.5979e-04
Loss = 8.6135e-02, PNorm = 84.6649, GNorm = 0.6715, lr_0 = 1.5968e-04
Loss = 8.6861e-02, PNorm = 84.6668, GNorm = 0.4400, lr_0 = 1.5957e-04
Loss = 8.1289e-02, PNorm = 84.6694, GNorm = 0.6823, lr_0 = 1.5946e-04
Loss = 8.7471e-02, PNorm = 84.6708, GNorm = 0.6679, lr_0 = 1.5935e-04
Loss = 8.4898e-02, PNorm = 84.6710, GNorm = 0.5125, lr_0 = 1.5924e-04
Loss = 7.6876e-02, PNorm = 84.6725, GNorm = 0.5662, lr_0 = 1.5913e-04
Loss = 7.9382e-02, PNorm = 84.6742, GNorm = 0.8512, lr_0 = 1.5902e-04
Loss = 8.8662e-02, PNorm = 84.6763, GNorm = 0.7253, lr_0 = 1.5891e-04
Loss = 8.3224e-02, PNorm = 84.6819, GNorm = 0.6913, lr_0 = 1.5880e-04
Loss = 8.9078e-02, PNorm = 84.6852, GNorm = 0.5117, lr_0 = 1.5870e-04
Loss = 7.1707e-02, PNorm = 84.6882, GNorm = 0.6717, lr_0 = 1.5859e-04
Loss = 7.6370e-02, PNorm = 84.6903, GNorm = 0.8251, lr_0 = 1.5848e-04
Loss = 7.7812e-02, PNorm = 84.6935, GNorm = 0.6549, lr_0 = 1.5837e-04
Loss = 8.1266e-02, PNorm = 84.6948, GNorm = 0.5547, lr_0 = 1.5826e-04
Loss = 8.5230e-02, PNorm = 84.6943, GNorm = 0.7553, lr_0 = 1.5815e-04
Loss = 8.6957e-02, PNorm = 84.6959, GNorm = 0.7724, lr_0 = 1.5804e-04
Loss = 8.4909e-02, PNorm = 84.6996, GNorm = 0.5767, lr_0 = 1.5794e-04
Loss = 8.4646e-02, PNorm = 84.7034, GNorm = 0.4715, lr_0 = 1.5783e-04
Loss = 8.6990e-02, PNorm = 84.7087, GNorm = 0.5587, lr_0 = 1.5772e-04
Loss = 7.4264e-02, PNorm = 84.7118, GNorm = 0.8137, lr_0 = 1.5761e-04
Loss = 7.3078e-02, PNorm = 84.7124, GNorm = 0.6285, lr_0 = 1.5750e-04
Loss = 8.8875e-02, PNorm = 84.7134, GNorm = 0.6525, lr_0 = 1.5740e-04
Loss = 7.9707e-02, PNorm = 84.7141, GNorm = 0.6834, lr_0 = 1.5729e-04
Loss = 8.8536e-02, PNorm = 84.7147, GNorm = 0.8175, lr_0 = 1.5718e-04
Loss = 8.3784e-02, PNorm = 84.7159, GNorm = 0.5964, lr_0 = 1.5707e-04
Loss = 7.7227e-02, PNorm = 84.7172, GNorm = 0.6183, lr_0 = 1.5697e-04
Loss = 7.6496e-02, PNorm = 84.7201, GNorm = 0.5067, lr_0 = 1.5686e-04
Loss = 8.5776e-02, PNorm = 84.7228, GNorm = 0.6929, lr_0 = 1.5675e-04
Loss = 8.2841e-02, PNorm = 84.7216, GNorm = 0.5036, lr_0 = 1.5664e-04
Loss = 8.6226e-02, PNorm = 84.7226, GNorm = 0.7318, lr_0 = 1.5654e-04
Loss = 9.0225e-02, PNorm = 84.7248, GNorm = 0.7060, lr_0 = 1.5643e-04
Loss = 8.6768e-02, PNorm = 84.7294, GNorm = 0.7084, lr_0 = 1.5632e-04
Loss = 7.7025e-02, PNorm = 84.7313, GNorm = 0.5371, lr_0 = 1.5621e-04
Loss = 8.0431e-02, PNorm = 84.7333, GNorm = 0.6375, lr_0 = 1.5611e-04
Loss = 8.6437e-02, PNorm = 84.7373, GNorm = 1.0349, lr_0 = 1.5600e-04
Loss = 7.9033e-02, PNorm = 84.7389, GNorm = 0.4951, lr_0 = 1.5589e-04
Loss = 8.5789e-02, PNorm = 84.7394, GNorm = 0.7705, lr_0 = 1.5579e-04
Loss = 8.7695e-02, PNorm = 84.7414, GNorm = 0.6001, lr_0 = 1.5568e-04
Loss = 8.1860e-02, PNorm = 84.7438, GNorm = 0.5898, lr_0 = 1.5557e-04
Loss = 8.0879e-02, PNorm = 84.7479, GNorm = 0.6337, lr_0 = 1.5547e-04
Loss = 8.2158e-02, PNorm = 84.7530, GNorm = 0.5265, lr_0 = 1.5536e-04
Loss = 7.9992e-02, PNorm = 84.7559, GNorm = 0.6272, lr_0 = 1.5525e-04
Loss = 8.0968e-02, PNorm = 84.7592, GNorm = 0.7044, lr_0 = 1.5515e-04
Loss = 8.9666e-02, PNorm = 84.7639, GNorm = 0.6991, lr_0 = 1.5504e-04
Loss = 7.3054e-02, PNorm = 84.7668, GNorm = 0.6879, lr_0 = 1.5493e-04
Loss = 7.9914e-02, PNorm = 84.7689, GNorm = 0.7856, lr_0 = 1.5483e-04
Loss = 8.4680e-02, PNorm = 84.7716, GNorm = 0.3872, lr_0 = 1.5472e-04
Loss = 8.4980e-02, PNorm = 84.7750, GNorm = 0.7424, lr_0 = 1.5462e-04
Loss = 8.5171e-02, PNorm = 84.7771, GNorm = 0.4276, lr_0 = 1.5451e-04
Loss = 9.7075e-02, PNorm = 84.7800, GNorm = 0.7490, lr_0 = 1.5440e-04
Loss = 7.7801e-02, PNorm = 84.7830, GNorm = 0.7070, lr_0 = 1.5430e-04
Loss = 7.5555e-02, PNorm = 84.7877, GNorm = 0.7446, lr_0 = 1.5419e-04
Loss = 7.9991e-02, PNorm = 84.7907, GNorm = 0.6992, lr_0 = 1.5409e-04
Loss = 8.2789e-02, PNorm = 84.7939, GNorm = 0.6361, lr_0 = 1.5398e-04
Loss = 7.1817e-02, PNorm = 84.7961, GNorm = 0.5413, lr_0 = 1.5388e-04
Loss = 9.2853e-02, PNorm = 84.7986, GNorm = 1.0068, lr_0 = 1.5377e-04
Loss = 8.0277e-02, PNorm = 84.7995, GNorm = 0.6427, lr_0 = 1.5367e-04
Loss = 8.5809e-02, PNorm = 84.8017, GNorm = 0.9385, lr_0 = 1.5356e-04
Loss = 8.8733e-02, PNorm = 84.8058, GNorm = 0.8118, lr_0 = 1.5346e-04
Loss = 8.0977e-02, PNorm = 84.8069, GNorm = 0.5440, lr_0 = 1.5335e-04
Loss = 8.4269e-02, PNorm = 84.8107, GNorm = 0.5244, lr_0 = 1.5325e-04
Loss = 7.7208e-02, PNorm = 84.8144, GNorm = 0.4680, lr_0 = 1.5314e-04
Loss = 8.1021e-02, PNorm = 84.8154, GNorm = 0.7815, lr_0 = 1.5304e-04
Loss = 8.5702e-02, PNorm = 84.8169, GNorm = 0.6547, lr_0 = 1.5293e-04
Loss = 7.4384e-02, PNorm = 84.8187, GNorm = 0.7443, lr_0 = 1.5283e-04
Loss = 8.2472e-02, PNorm = 84.8217, GNorm = 0.6238, lr_0 = 1.5272e-04
Loss = 8.6659e-02, PNorm = 84.8241, GNorm = 0.5417, lr_0 = 1.5262e-04
Loss = 8.6412e-02, PNorm = 84.8270, GNorm = 0.8906, lr_0 = 1.5251e-04
Loss = 9.0616e-02, PNorm = 84.8305, GNorm = 0.5704, lr_0 = 1.5241e-04
Loss = 8.8726e-02, PNorm = 84.8332, GNorm = 0.7349, lr_0 = 1.5230e-04
Loss = 8.7012e-02, PNorm = 84.8355, GNorm = 0.9211, lr_0 = 1.5220e-04
Loss = 8.4396e-02, PNorm = 84.8392, GNorm = 0.9495, lr_0 = 1.5209e-04
Loss = 8.5422e-02, PNorm = 84.8407, GNorm = 0.8521, lr_0 = 1.5199e-04
Loss = 8.9989e-02, PNorm = 84.8441, GNorm = 1.0409, lr_0 = 1.5189e-04
Loss = 8.2363e-02, PNorm = 84.8475, GNorm = 0.8180, lr_0 = 1.5178e-04
Loss = 7.9198e-02, PNorm = 84.8486, GNorm = 0.7693, lr_0 = 1.5168e-04
Loss = 8.4118e-02, PNorm = 84.8496, GNorm = 0.4872, lr_0 = 1.5157e-04
Loss = 7.9403e-02, PNorm = 84.8525, GNorm = 0.7521, lr_0 = 1.5147e-04
Loss = 8.3558e-02, PNorm = 84.8556, GNorm = 0.6680, lr_0 = 1.5137e-04
Loss = 8.5317e-02, PNorm = 84.8573, GNorm = 0.6394, lr_0 = 1.5126e-04
Loss = 9.4301e-02, PNorm = 84.8601, GNorm = 0.6904, lr_0 = 1.5116e-04
Loss = 7.6733e-02, PNorm = 84.8617, GNorm = 0.8441, lr_0 = 1.5106e-04
Loss = 7.7710e-02, PNorm = 84.8612, GNorm = 0.6478, lr_0 = 1.5095e-04
Loss = 8.4849e-02, PNorm = 84.8647, GNorm = 0.6044, lr_0 = 1.5085e-04
Validation mae = 0.231155
Epoch 25
Loss = 6.9352e-02, PNorm = 84.8679, GNorm = 0.6855, lr_0 = 1.5075e-04
Loss = 6.9448e-02, PNorm = 84.8737, GNorm = 0.5109, lr_0 = 1.5064e-04
Loss = 7.3562e-02, PNorm = 84.8774, GNorm = 0.7329, lr_0 = 1.5054e-04
Loss = 7.9514e-02, PNorm = 84.8815, GNorm = 0.7982, lr_0 = 1.5044e-04
Loss = 7.8502e-02, PNorm = 84.8844, GNorm = 0.6734, lr_0 = 1.5033e-04
Loss = 7.6407e-02, PNorm = 84.8883, GNorm = 0.5173, lr_0 = 1.5023e-04
Loss = 8.2613e-02, PNorm = 84.8885, GNorm = 0.7129, lr_0 = 1.5013e-04
Loss = 8.4450e-02, PNorm = 84.8871, GNorm = 0.5297, lr_0 = 1.5002e-04
Loss = 7.7431e-02, PNorm = 84.8903, GNorm = 0.6014, lr_0 = 1.4992e-04
Loss = 8.0028e-02, PNorm = 84.8932, GNorm = 0.6306, lr_0 = 1.4982e-04
Loss = 7.2243e-02, PNorm = 84.8945, GNorm = 0.4589, lr_0 = 1.4972e-04
Loss = 7.4326e-02, PNorm = 84.8977, GNorm = 0.6579, lr_0 = 1.4961e-04
Loss = 7.5919e-02, PNorm = 84.9013, GNorm = 0.6984, lr_0 = 1.4951e-04
Loss = 7.6967e-02, PNorm = 84.9028, GNorm = 0.5997, lr_0 = 1.4941e-04
Loss = 7.7537e-02, PNorm = 84.9037, GNorm = 0.7112, lr_0 = 1.4931e-04
Loss = 7.7881e-02, PNorm = 84.9052, GNorm = 0.6853, lr_0 = 1.4920e-04
Loss = 7.7435e-02, PNorm = 84.9046, GNorm = 0.8953, lr_0 = 1.4910e-04
Loss = 8.9057e-02, PNorm = 84.9083, GNorm = 0.5883, lr_0 = 1.4900e-04
Loss = 8.4610e-02, PNorm = 84.9119, GNorm = 0.8312, lr_0 = 1.4890e-04
Loss = 6.9960e-02, PNorm = 84.9142, GNorm = 0.4413, lr_0 = 1.4880e-04
Loss = 8.7903e-02, PNorm = 84.9168, GNorm = 0.5075, lr_0 = 1.4869e-04
Loss = 8.7504e-02, PNorm = 84.9195, GNorm = 0.5624, lr_0 = 1.4859e-04
Loss = 8.3858e-02, PNorm = 84.9201, GNorm = 0.6345, lr_0 = 1.4849e-04
Loss = 8.1266e-02, PNorm = 84.9217, GNorm = 0.5595, lr_0 = 1.4839e-04
Loss = 7.9026e-02, PNorm = 84.9233, GNorm = 0.5837, lr_0 = 1.4829e-04
Loss = 9.0526e-02, PNorm = 84.9262, GNorm = 0.6976, lr_0 = 1.4818e-04
Loss = 7.2992e-02, PNorm = 84.9284, GNorm = 0.5187, lr_0 = 1.4808e-04
Loss = 8.1508e-02, PNorm = 84.9299, GNorm = 0.5374, lr_0 = 1.4798e-04
Loss = 7.8690e-02, PNorm = 84.9324, GNorm = 0.5895, lr_0 = 1.4788e-04
Loss = 6.9503e-02, PNorm = 84.9328, GNorm = 0.5773, lr_0 = 1.4778e-04
Loss = 7.5085e-02, PNorm = 84.9341, GNorm = 0.8807, lr_0 = 1.4768e-04
Loss = 7.6345e-02, PNorm = 84.9380, GNorm = 0.6118, lr_0 = 1.4758e-04
Loss = 7.5180e-02, PNorm = 84.9434, GNorm = 0.6709, lr_0 = 1.4748e-04
Loss = 7.8023e-02, PNorm = 84.9467, GNorm = 0.7550, lr_0 = 1.4737e-04
Loss = 7.3496e-02, PNorm = 84.9477, GNorm = 0.7471, lr_0 = 1.4727e-04
Loss = 6.9252e-02, PNorm = 84.9488, GNorm = 0.6790, lr_0 = 1.4717e-04
Loss = 8.0392e-02, PNorm = 84.9515, GNorm = 0.4763, lr_0 = 1.4707e-04
Loss = 7.2249e-02, PNorm = 84.9545, GNorm = 0.5884, lr_0 = 1.4697e-04
Loss = 8.3078e-02, PNorm = 84.9547, GNorm = 0.7273, lr_0 = 1.4687e-04
Loss = 8.2207e-02, PNorm = 84.9557, GNorm = 0.4291, lr_0 = 1.4677e-04
Loss = 7.7803e-02, PNorm = 84.9580, GNorm = 0.5618, lr_0 = 1.4667e-04
Loss = 7.5934e-02, PNorm = 84.9595, GNorm = 0.5856, lr_0 = 1.4657e-04
Loss = 7.9830e-02, PNorm = 84.9617, GNorm = 0.5737, lr_0 = 1.4647e-04
Loss = 1.0087e-01, PNorm = 84.9655, GNorm = 0.5262, lr_0 = 1.4637e-04
Loss = 8.1497e-02, PNorm = 84.9673, GNorm = 0.6133, lr_0 = 1.4627e-04
Loss = 8.3647e-02, PNorm = 84.9694, GNorm = 0.5394, lr_0 = 1.4617e-04
Loss = 7.6153e-02, PNorm = 84.9734, GNorm = 0.6939, lr_0 = 1.4607e-04
Loss = 7.3293e-02, PNorm = 84.9760, GNorm = 0.6968, lr_0 = 1.4597e-04
Loss = 8.1648e-02, PNorm = 84.9757, GNorm = 0.6858, lr_0 = 1.4587e-04
Loss = 7.8646e-02, PNorm = 84.9791, GNorm = 0.4859, lr_0 = 1.4577e-04
Loss = 7.9095e-02, PNorm = 84.9839, GNorm = 0.7919, lr_0 = 1.4567e-04
Loss = 9.5406e-02, PNorm = 84.9871, GNorm = 0.5589, lr_0 = 1.4557e-04
Loss = 8.5276e-02, PNorm = 84.9893, GNorm = 0.5068, lr_0 = 1.4547e-04
Loss = 6.7723e-02, PNorm = 84.9910, GNorm = 0.6610, lr_0 = 1.4537e-04
Loss = 7.6263e-02, PNorm = 84.9923, GNorm = 0.5770, lr_0 = 1.4527e-04
Loss = 6.8314e-02, PNorm = 84.9950, GNorm = 0.5684, lr_0 = 1.4517e-04
Loss = 7.9349e-02, PNorm = 84.9957, GNorm = 0.6054, lr_0 = 1.4507e-04
Loss = 6.6703e-02, PNorm = 84.9974, GNorm = 0.5954, lr_0 = 1.4497e-04
Loss = 7.1818e-02, PNorm = 85.0000, GNorm = 0.5776, lr_0 = 1.4487e-04
Loss = 7.2222e-02, PNorm = 85.0014, GNorm = 0.5365, lr_0 = 1.4477e-04
Loss = 8.7783e-02, PNorm = 85.0017, GNorm = 0.6597, lr_0 = 1.4467e-04
Loss = 8.3918e-02, PNorm = 85.0055, GNorm = 0.5981, lr_0 = 1.4457e-04
Loss = 7.5025e-02, PNorm = 85.0078, GNorm = 0.5481, lr_0 = 1.4447e-04
Loss = 6.7788e-02, PNorm = 85.0084, GNorm = 0.5024, lr_0 = 1.4438e-04
Loss = 7.3291e-02, PNorm = 85.0109, GNorm = 0.6734, lr_0 = 1.4428e-04
Loss = 7.4101e-02, PNorm = 85.0114, GNorm = 0.5813, lr_0 = 1.4418e-04
Loss = 8.3410e-02, PNorm = 85.0124, GNorm = 0.5339, lr_0 = 1.4408e-04
Loss = 8.9605e-02, PNorm = 85.0166, GNorm = 0.6449, lr_0 = 1.4398e-04
Loss = 8.0653e-02, PNorm = 85.0195, GNorm = 0.8890, lr_0 = 1.4388e-04
Loss = 7.8671e-02, PNorm = 85.0223, GNorm = 0.6112, lr_0 = 1.4378e-04
Loss = 8.1888e-02, PNorm = 85.0257, GNorm = 0.7049, lr_0 = 1.4368e-04
Loss = 8.8906e-02, PNorm = 85.0290, GNorm = 0.5065, lr_0 = 1.4359e-04
Loss = 7.6738e-02, PNorm = 85.0317, GNorm = 0.5470, lr_0 = 1.4349e-04
Loss = 7.5705e-02, PNorm = 85.0332, GNorm = 0.5918, lr_0 = 1.4339e-04
Loss = 9.1035e-02, PNorm = 85.0326, GNorm = 0.7590, lr_0 = 1.4329e-04
Loss = 9.3016e-02, PNorm = 85.0367, GNorm = 0.6516, lr_0 = 1.4319e-04
Loss = 9.9247e-02, PNorm = 85.0391, GNorm = 0.5027, lr_0 = 1.4310e-04
Loss = 8.1117e-02, PNorm = 85.0394, GNorm = 0.7602, lr_0 = 1.4300e-04
Loss = 6.9920e-02, PNorm = 85.0421, GNorm = 0.6285, lr_0 = 1.4290e-04
Loss = 7.8270e-02, PNorm = 85.0432, GNorm = 0.5564, lr_0 = 1.4280e-04
Loss = 9.1158e-02, PNorm = 85.0459, GNorm = 0.7663, lr_0 = 1.4270e-04
Loss = 7.7832e-02, PNorm = 85.0475, GNorm = 0.6749, lr_0 = 1.4261e-04
Loss = 7.4456e-02, PNorm = 85.0506, GNorm = 0.7125, lr_0 = 1.4251e-04
Loss = 9.0251e-02, PNorm = 85.0558, GNorm = 0.6299, lr_0 = 1.4241e-04
Loss = 9.2460e-02, PNorm = 85.0595, GNorm = 0.6827, lr_0 = 1.4231e-04
Loss = 7.9030e-02, PNorm = 85.0618, GNorm = 0.8482, lr_0 = 1.4222e-04
Loss = 8.6225e-02, PNorm = 85.0641, GNorm = 0.5496, lr_0 = 1.4212e-04
Loss = 6.9200e-02, PNorm = 85.0674, GNorm = 0.5503, lr_0 = 1.4202e-04
Loss = 8.2075e-02, PNorm = 85.0708, GNorm = 0.5961, lr_0 = 1.4192e-04
Loss = 8.6410e-02, PNorm = 85.0731, GNorm = 0.9860, lr_0 = 1.4183e-04
Loss = 8.1616e-02, PNorm = 85.0747, GNorm = 0.8059, lr_0 = 1.4173e-04
Loss = 8.3265e-02, PNorm = 85.0748, GNorm = 0.6320, lr_0 = 1.4163e-04
Loss = 8.8600e-02, PNorm = 85.0787, GNorm = 0.5201, lr_0 = 1.4153e-04
Loss = 8.9981e-02, PNorm = 85.0821, GNorm = 0.5992, lr_0 = 1.4144e-04
Loss = 8.1412e-02, PNorm = 85.0827, GNorm = 1.0511, lr_0 = 1.4134e-04
Loss = 7.6405e-02, PNorm = 85.0858, GNorm = 0.5140, lr_0 = 1.4124e-04
Loss = 8.2826e-02, PNorm = 85.0875, GNorm = 0.5491, lr_0 = 1.4115e-04
Loss = 8.5896e-02, PNorm = 85.0901, GNorm = 0.8054, lr_0 = 1.4105e-04
Loss = 7.9356e-02, PNorm = 85.0915, GNorm = 0.5192, lr_0 = 1.4095e-04
Loss = 8.5027e-02, PNorm = 85.0935, GNorm = 0.6570, lr_0 = 1.4086e-04
Loss = 9.1422e-02, PNorm = 85.0965, GNorm = 0.7782, lr_0 = 1.4076e-04
Loss = 8.4634e-02, PNorm = 85.0999, GNorm = 0.6137, lr_0 = 1.4066e-04
Loss = 8.1882e-02, PNorm = 85.1030, GNorm = 0.5401, lr_0 = 1.4057e-04
Loss = 9.0734e-02, PNorm = 85.1070, GNorm = 0.5949, lr_0 = 1.4047e-04
Loss = 8.0305e-02, PNorm = 85.1084, GNorm = 0.6039, lr_0 = 1.4038e-04
Loss = 8.5796e-02, PNorm = 85.1103, GNorm = 0.7957, lr_0 = 1.4028e-04
Loss = 8.0989e-02, PNorm = 85.1127, GNorm = 0.7792, lr_0 = 1.4018e-04
Loss = 8.2664e-02, PNorm = 85.1148, GNorm = 0.8489, lr_0 = 1.4009e-04
Loss = 7.5704e-02, PNorm = 85.1158, GNorm = 0.6329, lr_0 = 1.3999e-04
Loss = 7.3629e-02, PNorm = 85.1179, GNorm = 0.5329, lr_0 = 1.3990e-04
Loss = 7.9987e-02, PNorm = 85.1192, GNorm = 0.6542, lr_0 = 1.3980e-04
Loss = 8.1716e-02, PNorm = 85.1225, GNorm = 1.0248, lr_0 = 1.3970e-04
Loss = 7.9020e-02, PNorm = 85.1247, GNorm = 0.7115, lr_0 = 1.3961e-04
Loss = 8.7901e-02, PNorm = 85.1268, GNorm = 0.7732, lr_0 = 1.3951e-04
Loss = 8.5433e-02, PNorm = 85.1274, GNorm = 0.8758, lr_0 = 1.3942e-04
Loss = 8.3461e-02, PNorm = 85.1279, GNorm = 0.5378, lr_0 = 1.3932e-04
Loss = 7.5185e-02, PNorm = 85.1300, GNorm = 0.5040, lr_0 = 1.3923e-04
Loss = 8.2143e-02, PNorm = 85.1339, GNorm = 0.6152, lr_0 = 1.3913e-04
Loss = 8.1341e-02, PNorm = 85.1365, GNorm = 0.7235, lr_0 = 1.3904e-04
Loss = 8.5007e-02, PNorm = 85.1407, GNorm = 0.7639, lr_0 = 1.3894e-04
Validation mae = 0.228029
Epoch 26
Loss = 8.0757e-02, PNorm = 85.1446, GNorm = 0.7063, lr_0 = 1.3884e-04
Loss = 7.6439e-02, PNorm = 85.1475, GNorm = 0.5702, lr_0 = 1.3875e-04
Loss = 6.5951e-02, PNorm = 85.1522, GNorm = 0.5318, lr_0 = 1.3865e-04
Loss = 6.8291e-02, PNorm = 85.1532, GNorm = 0.5038, lr_0 = 1.3856e-04
Loss = 7.9478e-02, PNorm = 85.1544, GNorm = 0.6469, lr_0 = 1.3846e-04
Loss = 8.3258e-02, PNorm = 85.1565, GNorm = 0.6050, lr_0 = 1.3837e-04
Loss = 8.7707e-02, PNorm = 85.1594, GNorm = 0.6672, lr_0 = 1.3828e-04
Loss = 7.8913e-02, PNorm = 85.1618, GNorm = 0.7109, lr_0 = 1.3818e-04
Loss = 6.8296e-02, PNorm = 85.1625, GNorm = 0.7404, lr_0 = 1.3809e-04
Loss = 7.5336e-02, PNorm = 85.1639, GNorm = 0.6646, lr_0 = 1.3799e-04
Loss = 6.9147e-02, PNorm = 85.1649, GNorm = 0.5648, lr_0 = 1.3790e-04
Loss = 6.8888e-02, PNorm = 85.1667, GNorm = 0.6398, lr_0 = 1.3780e-04
Loss = 8.0123e-02, PNorm = 85.1694, GNorm = 0.4305, lr_0 = 1.3771e-04
Loss = 7.8711e-02, PNorm = 85.1708, GNorm = 0.5928, lr_0 = 1.3761e-04
Loss = 6.9028e-02, PNorm = 85.1737, GNorm = 0.6362, lr_0 = 1.3752e-04
Loss = 7.3630e-02, PNorm = 85.1783, GNorm = 0.5233, lr_0 = 1.3742e-04
Loss = 7.5400e-02, PNorm = 85.1816, GNorm = 0.6456, lr_0 = 1.3733e-04
Loss = 7.1790e-02, PNorm = 85.1805, GNorm = 0.4632, lr_0 = 1.3724e-04
Loss = 8.4458e-02, PNorm = 85.1825, GNorm = 0.7014, lr_0 = 1.3714e-04
Loss = 7.1067e-02, PNorm = 85.1878, GNorm = 0.5708, lr_0 = 1.3705e-04
Loss = 8.5642e-02, PNorm = 85.1891, GNorm = 0.5767, lr_0 = 1.3695e-04
Loss = 7.7275e-02, PNorm = 85.1917, GNorm = 0.6168, lr_0 = 1.3686e-04
Loss = 7.1260e-02, PNorm = 85.1939, GNorm = 0.4597, lr_0 = 1.3677e-04
Loss = 7.9926e-02, PNorm = 85.1972, GNorm = 0.6241, lr_0 = 1.3667e-04
Loss = 7.1050e-02, PNorm = 85.1984, GNorm = 0.5910, lr_0 = 1.3658e-04
Loss = 7.6373e-02, PNorm = 85.2015, GNorm = 0.8228, lr_0 = 1.3649e-04
Loss = 8.4628e-02, PNorm = 85.2033, GNorm = 0.5998, lr_0 = 1.3639e-04
Loss = 7.0334e-02, PNorm = 85.2049, GNorm = 0.5619, lr_0 = 1.3630e-04
Loss = 8.3551e-02, PNorm = 85.2055, GNorm = 0.4892, lr_0 = 1.3621e-04
Loss = 6.6542e-02, PNorm = 85.2061, GNorm = 0.5978, lr_0 = 1.3611e-04
Loss = 7.4248e-02, PNorm = 85.2083, GNorm = 0.5727, lr_0 = 1.3602e-04
Loss = 7.8572e-02, PNorm = 85.2108, GNorm = 0.7535, lr_0 = 1.3593e-04
Loss = 8.3178e-02, PNorm = 85.2149, GNorm = 0.7443, lr_0 = 1.3583e-04
Loss = 8.4382e-02, PNorm = 85.2184, GNorm = 0.5077, lr_0 = 1.3574e-04
Loss = 7.4476e-02, PNorm = 85.2197, GNorm = 0.8235, lr_0 = 1.3565e-04
Loss = 7.4376e-02, PNorm = 85.2230, GNorm = 0.6167, lr_0 = 1.3555e-04
Loss = 7.8261e-02, PNorm = 85.2246, GNorm = 0.7134, lr_0 = 1.3546e-04
Loss = 8.3710e-02, PNorm = 85.2287, GNorm = 0.7636, lr_0 = 1.3537e-04
Loss = 7.0215e-02, PNorm = 85.2295, GNorm = 0.4754, lr_0 = 1.3528e-04
Loss = 8.3664e-02, PNorm = 85.2309, GNorm = 0.5635, lr_0 = 1.3518e-04
Loss = 8.3704e-02, PNorm = 85.2335, GNorm = 0.5732, lr_0 = 1.3509e-04
Loss = 8.3373e-02, PNorm = 85.2355, GNorm = 0.6208, lr_0 = 1.3500e-04
Loss = 6.6792e-02, PNorm = 85.2383, GNorm = 0.5372, lr_0 = 1.3491e-04
Loss = 8.4710e-02, PNorm = 85.2409, GNorm = 0.6151, lr_0 = 1.3481e-04
Loss = 6.7244e-02, PNorm = 85.2439, GNorm = 0.4744, lr_0 = 1.3472e-04
Loss = 8.4500e-02, PNorm = 85.2462, GNorm = 0.9014, lr_0 = 1.3463e-04
Loss = 8.0534e-02, PNorm = 85.2476, GNorm = 0.9332, lr_0 = 1.3454e-04
Loss = 7.4445e-02, PNorm = 85.2504, GNorm = 0.6489, lr_0 = 1.3444e-04
Loss = 8.2946e-02, PNorm = 85.2534, GNorm = 0.6766, lr_0 = 1.3435e-04
Loss = 9.0971e-02, PNorm = 85.2572, GNorm = 0.6547, lr_0 = 1.3426e-04
Loss = 7.4057e-02, PNorm = 85.2629, GNorm = 0.7027, lr_0 = 1.3417e-04
Loss = 7.7649e-02, PNorm = 85.2663, GNorm = 0.5181, lr_0 = 1.3408e-04
Loss = 7.7498e-02, PNorm = 85.2674, GNorm = 0.6108, lr_0 = 1.3398e-04
Loss = 6.8845e-02, PNorm = 85.2696, GNorm = 0.6652, lr_0 = 1.3389e-04
Loss = 7.2015e-02, PNorm = 85.2704, GNorm = 0.6090, lr_0 = 1.3380e-04
Loss = 9.5799e-02, PNorm = 85.2724, GNorm = 0.7909, lr_0 = 1.3371e-04
Loss = 7.3774e-02, PNorm = 85.2748, GNorm = 0.5331, lr_0 = 1.3362e-04
Loss = 8.3162e-02, PNorm = 85.2768, GNorm = 0.8525, lr_0 = 1.3353e-04
Loss = 7.6390e-02, PNorm = 85.2788, GNorm = 0.9025, lr_0 = 1.3343e-04
Loss = 7.8014e-02, PNorm = 85.2807, GNorm = 0.6399, lr_0 = 1.3334e-04
Loss = 7.5432e-02, PNorm = 85.2805, GNorm = 0.6090, lr_0 = 1.3325e-04
Loss = 7.8705e-02, PNorm = 85.2796, GNorm = 0.7418, lr_0 = 1.3316e-04
Loss = 8.0308e-02, PNorm = 85.2815, GNorm = 0.6674, lr_0 = 1.3307e-04
Loss = 7.3582e-02, PNorm = 85.2828, GNorm = 0.6785, lr_0 = 1.3298e-04
Loss = 7.7384e-02, PNorm = 85.2856, GNorm = 0.6362, lr_0 = 1.3289e-04
Loss = 9.1829e-02, PNorm = 85.2875, GNorm = 0.8897, lr_0 = 1.3280e-04
Loss = 8.8874e-02, PNorm = 85.2905, GNorm = 0.9430, lr_0 = 1.3270e-04
Loss = 6.9381e-02, PNorm = 85.2915, GNorm = 0.5710, lr_0 = 1.3261e-04
Loss = 7.7152e-02, PNorm = 85.2944, GNorm = 0.6251, lr_0 = 1.3252e-04
Loss = 7.3411e-02, PNorm = 85.2970, GNorm = 0.5304, lr_0 = 1.3243e-04
Loss = 7.1104e-02, PNorm = 85.2989, GNorm = 0.5570, lr_0 = 1.3234e-04
Loss = 6.6653e-02, PNorm = 85.3004, GNorm = 0.5851, lr_0 = 1.3225e-04
Loss = 7.5273e-02, PNorm = 85.3046, GNorm = 0.6571, lr_0 = 1.3216e-04
Loss = 8.2408e-02, PNorm = 85.3076, GNorm = 0.5990, lr_0 = 1.3207e-04
Loss = 8.3115e-02, PNorm = 85.3098, GNorm = 0.9301, lr_0 = 1.3198e-04
Loss = 7.6426e-02, PNorm = 85.3130, GNorm = 0.5961, lr_0 = 1.3189e-04
Loss = 7.9694e-02, PNorm = 85.3154, GNorm = 0.5186, lr_0 = 1.3180e-04
Loss = 8.4071e-02, PNorm = 85.3167, GNorm = 0.6581, lr_0 = 1.3171e-04
Loss = 8.7410e-02, PNorm = 85.3174, GNorm = 0.6589, lr_0 = 1.3162e-04
Loss = 7.4842e-02, PNorm = 85.3188, GNorm = 0.5366, lr_0 = 1.3153e-04
Loss = 8.0271e-02, PNorm = 85.3179, GNorm = 0.7039, lr_0 = 1.3144e-04
Loss = 7.2070e-02, PNorm = 85.3192, GNorm = 0.7098, lr_0 = 1.3135e-04
Loss = 7.5432e-02, PNorm = 85.3200, GNorm = 0.7966, lr_0 = 1.3126e-04
Loss = 8.4871e-02, PNorm = 85.3221, GNorm = 0.5058, lr_0 = 1.3117e-04
Loss = 8.4339e-02, PNorm = 85.3240, GNorm = 0.7402, lr_0 = 1.3108e-04
Loss = 8.0364e-02, PNorm = 85.3270, GNorm = 0.5063, lr_0 = 1.3099e-04
Loss = 7.7592e-02, PNorm = 85.3291, GNorm = 0.6245, lr_0 = 1.3090e-04
Loss = 8.3699e-02, PNorm = 85.3304, GNorm = 0.6458, lr_0 = 1.3081e-04
Loss = 7.3590e-02, PNorm = 85.3334, GNorm = 0.7168, lr_0 = 1.3072e-04
Loss = 8.1182e-02, PNorm = 85.3374, GNorm = 0.6735, lr_0 = 1.3063e-04
Loss = 7.4205e-02, PNorm = 85.3400, GNorm = 0.5751, lr_0 = 1.3054e-04
Loss = 8.9898e-02, PNorm = 85.3407, GNorm = 0.6874, lr_0 = 1.3045e-04
Loss = 7.8311e-02, PNorm = 85.3407, GNorm = 0.5847, lr_0 = 1.3036e-04
Loss = 8.3804e-02, PNorm = 85.3399, GNorm = 0.7267, lr_0 = 1.3027e-04
Loss = 8.3030e-02, PNorm = 85.3400, GNorm = 0.6483, lr_0 = 1.3018e-04
Loss = 7.8512e-02, PNorm = 85.3385, GNorm = 0.6444, lr_0 = 1.3009e-04
Loss = 7.1499e-02, PNorm = 85.3392, GNorm = 0.4811, lr_0 = 1.3000e-04
Loss = 8.2634e-02, PNorm = 85.3411, GNorm = 0.4558, lr_0 = 1.2992e-04
Loss = 7.7710e-02, PNorm = 85.3431, GNorm = 0.6235, lr_0 = 1.2983e-04
Loss = 6.7879e-02, PNorm = 85.3448, GNorm = 0.5937, lr_0 = 1.2974e-04
Loss = 8.8124e-02, PNorm = 85.3460, GNorm = 0.5819, lr_0 = 1.2965e-04
Loss = 8.5488e-02, PNorm = 85.3487, GNorm = 0.8354, lr_0 = 1.2956e-04
Loss = 7.3825e-02, PNorm = 85.3495, GNorm = 0.6562, lr_0 = 1.2947e-04
Loss = 8.2088e-02, PNorm = 85.3510, GNorm = 0.5731, lr_0 = 1.2938e-04
Loss = 8.2746e-02, PNorm = 85.3545, GNorm = 0.6228, lr_0 = 1.2929e-04
Loss = 9.2360e-02, PNorm = 85.3561, GNorm = 0.6715, lr_0 = 1.2921e-04
Loss = 6.8265e-02, PNorm = 85.3579, GNorm = 0.5042, lr_0 = 1.2912e-04
Loss = 8.1669e-02, PNorm = 85.3601, GNorm = 0.7074, lr_0 = 1.2903e-04
Loss = 6.3895e-02, PNorm = 85.3626, GNorm = 0.6010, lr_0 = 1.2894e-04
Loss = 8.1368e-02, PNorm = 85.3666, GNorm = 0.6336, lr_0 = 1.2885e-04
Loss = 7.6751e-02, PNorm = 85.3689, GNorm = 0.5762, lr_0 = 1.2876e-04
Loss = 8.4914e-02, PNorm = 85.3717, GNorm = 0.5847, lr_0 = 1.2867e-04
Loss = 7.8291e-02, PNorm = 85.3748, GNorm = 0.5591, lr_0 = 1.2859e-04
Loss = 8.1889e-02, PNorm = 85.3772, GNorm = 0.6124, lr_0 = 1.2850e-04
Loss = 7.9893e-02, PNorm = 85.3815, GNorm = 0.6306, lr_0 = 1.2841e-04
Loss = 8.7544e-02, PNorm = 85.3826, GNorm = 0.6393, lr_0 = 1.2832e-04
Loss = 7.7345e-02, PNorm = 85.3829, GNorm = 0.5543, lr_0 = 1.2823e-04
Loss = 8.3987e-02, PNorm = 85.3848, GNorm = 0.5354, lr_0 = 1.2815e-04
Loss = 6.7734e-02, PNorm = 85.3854, GNorm = 0.6091, lr_0 = 1.2806e-04
Loss = 8.8921e-02, PNorm = 85.3874, GNorm = 0.5519, lr_0 = 1.2797e-04
Validation mae = 0.228017
Epoch 27
Loss = 7.5566e-02, PNorm = 85.3900, GNorm = 0.5735, lr_0 = 1.2788e-04
Loss = 8.0667e-02, PNorm = 85.3921, GNorm = 0.7541, lr_0 = 1.2780e-04
Loss = 7.6238e-02, PNorm = 85.3941, GNorm = 0.6860, lr_0 = 1.2771e-04
Loss = 8.0618e-02, PNorm = 85.3949, GNorm = 0.8116, lr_0 = 1.2762e-04
Loss = 6.8364e-02, PNorm = 85.3967, GNorm = 0.5923, lr_0 = 1.2753e-04
Loss = 7.5536e-02, PNorm = 85.3999, GNorm = 0.5263, lr_0 = 1.2745e-04
Loss = 7.6345e-02, PNorm = 85.4019, GNorm = 0.7282, lr_0 = 1.2736e-04
Loss = 7.9833e-02, PNorm = 85.4042, GNorm = 0.8054, lr_0 = 1.2727e-04
Loss = 7.7317e-02, PNorm = 85.4070, GNorm = 0.4574, lr_0 = 1.2718e-04
Loss = 7.8764e-02, PNorm = 85.4080, GNorm = 0.6178, lr_0 = 1.2710e-04
Loss = 7.0225e-02, PNorm = 85.4104, GNorm = 0.5129, lr_0 = 1.2701e-04
Loss = 8.6644e-02, PNorm = 85.4127, GNorm = 0.6932, lr_0 = 1.2692e-04
Loss = 6.9450e-02, PNorm = 85.4160, GNorm = 0.5183, lr_0 = 1.2684e-04
Loss = 7.7423e-02, PNorm = 85.4171, GNorm = 0.5284, lr_0 = 1.2675e-04
Loss = 7.9387e-02, PNorm = 85.4200, GNorm = 0.6472, lr_0 = 1.2666e-04
Loss = 8.0284e-02, PNorm = 85.4227, GNorm = 0.6301, lr_0 = 1.2658e-04
Loss = 7.5885e-02, PNorm = 85.4237, GNorm = 0.7430, lr_0 = 1.2649e-04
Loss = 8.8820e-02, PNorm = 85.4246, GNorm = 0.7381, lr_0 = 1.2640e-04
Loss = 7.2664e-02, PNorm = 85.4262, GNorm = 0.6669, lr_0 = 1.2632e-04
Loss = 7.8795e-02, PNorm = 85.4285, GNorm = 0.5480, lr_0 = 1.2623e-04
Loss = 7.7471e-02, PNorm = 85.4319, GNorm = 0.5381, lr_0 = 1.2614e-04
Loss = 7.5709e-02, PNorm = 85.4321, GNorm = 0.7339, lr_0 = 1.2606e-04
Loss = 7.3773e-02, PNorm = 85.4324, GNorm = 0.4758, lr_0 = 1.2597e-04
Loss = 7.0726e-02, PNorm = 85.4343, GNorm = 0.5970, lr_0 = 1.2588e-04
Loss = 6.6550e-02, PNorm = 85.4357, GNorm = 0.5895, lr_0 = 1.2580e-04
Loss = 7.1966e-02, PNorm = 85.4384, GNorm = 0.6719, lr_0 = 1.2571e-04
Loss = 7.2892e-02, PNorm = 85.4409, GNorm = 0.5886, lr_0 = 1.2563e-04
Loss = 6.8275e-02, PNorm = 85.4436, GNorm = 0.6045, lr_0 = 1.2554e-04
Loss = 7.4291e-02, PNorm = 85.4439, GNorm = 0.5314, lr_0 = 1.2545e-04
Loss = 6.6952e-02, PNorm = 85.4439, GNorm = 0.5618, lr_0 = 1.2537e-04
Loss = 7.7638e-02, PNorm = 85.4451, GNorm = 0.6119, lr_0 = 1.2528e-04
Loss = 7.7337e-02, PNorm = 85.4484, GNorm = 0.5977, lr_0 = 1.2520e-04
Loss = 7.9098e-02, PNorm = 85.4513, GNorm = 0.7359, lr_0 = 1.2511e-04
Loss = 7.7700e-02, PNorm = 85.4538, GNorm = 0.5236, lr_0 = 1.2502e-04
Loss = 7.7040e-02, PNorm = 85.4559, GNorm = 0.5901, lr_0 = 1.2494e-04
Loss = 7.4093e-02, PNorm = 85.4567, GNorm = 0.4878, lr_0 = 1.2485e-04
Loss = 7.5312e-02, PNorm = 85.4603, GNorm = 0.6079, lr_0 = 1.2477e-04
Loss = 7.4525e-02, PNorm = 85.4610, GNorm = 0.7094, lr_0 = 1.2468e-04
Loss = 6.8631e-02, PNorm = 85.4620, GNorm = 0.5293, lr_0 = 1.2460e-04
Loss = 7.3959e-02, PNorm = 85.4647, GNorm = 0.5401, lr_0 = 1.2451e-04
Loss = 7.6272e-02, PNorm = 85.4676, GNorm = 0.9588, lr_0 = 1.2443e-04
Loss = 6.8700e-02, PNorm = 85.4692, GNorm = 0.5565, lr_0 = 1.2434e-04
Loss = 8.2825e-02, PNorm = 85.4701, GNorm = 0.8497, lr_0 = 1.2426e-04
Loss = 7.8245e-02, PNorm = 85.4711, GNorm = 0.6958, lr_0 = 1.2417e-04
Loss = 8.3993e-02, PNorm = 85.4755, GNorm = 0.7590, lr_0 = 1.2409e-04
Loss = 8.1969e-02, PNorm = 85.4771, GNorm = 0.7068, lr_0 = 1.2400e-04
Loss = 7.5717e-02, PNorm = 85.4781, GNorm = 0.5267, lr_0 = 1.2392e-04
Loss = 7.4149e-02, PNorm = 85.4787, GNorm = 0.6155, lr_0 = 1.2383e-04
Loss = 7.1891e-02, PNorm = 85.4802, GNorm = 0.6115, lr_0 = 1.2375e-04
Loss = 7.8780e-02, PNorm = 85.4829, GNorm = 0.5083, lr_0 = 1.2366e-04
Loss = 6.3868e-02, PNorm = 85.4840, GNorm = 0.5238, lr_0 = 1.2358e-04
Loss = 8.6391e-02, PNorm = 85.4857, GNorm = 0.5373, lr_0 = 1.2349e-04
Loss = 8.3723e-02, PNorm = 85.4886, GNorm = 0.6417, lr_0 = 1.2341e-04
Loss = 7.6294e-02, PNorm = 85.4911, GNorm = 0.5863, lr_0 = 1.2332e-04
Loss = 7.9943e-02, PNorm = 85.4933, GNorm = 0.6652, lr_0 = 1.2324e-04
Loss = 8.8171e-02, PNorm = 85.4947, GNorm = 0.6859, lr_0 = 1.2315e-04
Loss = 8.5556e-02, PNorm = 85.4957, GNorm = 0.5941, lr_0 = 1.2307e-04
Loss = 7.4549e-02, PNorm = 85.4975, GNorm = 0.7271, lr_0 = 1.2298e-04
Loss = 7.4768e-02, PNorm = 85.4992, GNorm = 0.8034, lr_0 = 1.2290e-04
Loss = 7.4987e-02, PNorm = 85.5008, GNorm = 0.4180, lr_0 = 1.2282e-04
Loss = 7.6758e-02, PNorm = 85.5033, GNorm = 0.5054, lr_0 = 1.2273e-04
Loss = 7.4957e-02, PNorm = 85.5075, GNorm = 0.5836, lr_0 = 1.2265e-04
Loss = 7.3007e-02, PNorm = 85.5096, GNorm = 0.6064, lr_0 = 1.2256e-04
Loss = 8.1706e-02, PNorm = 85.5094, GNorm = 0.6404, lr_0 = 1.2248e-04
Loss = 8.1731e-02, PNorm = 85.5103, GNorm = 0.6437, lr_0 = 1.2240e-04
Loss = 7.6200e-02, PNorm = 85.5138, GNorm = 0.6653, lr_0 = 1.2231e-04
Loss = 7.3961e-02, PNorm = 85.5155, GNorm = 0.5724, lr_0 = 1.2223e-04
Loss = 7.7776e-02, PNorm = 85.5166, GNorm = 0.5592, lr_0 = 1.2214e-04
Loss = 7.1646e-02, PNorm = 85.5178, GNorm = 0.7952, lr_0 = 1.2206e-04
Loss = 7.7923e-02, PNorm = 85.5200, GNorm = 0.7138, lr_0 = 1.2198e-04
Loss = 7.9255e-02, PNorm = 85.5201, GNorm = 0.7250, lr_0 = 1.2189e-04
Loss = 8.4003e-02, PNorm = 85.5208, GNorm = 0.5699, lr_0 = 1.2181e-04
Loss = 7.2914e-02, PNorm = 85.5239, GNorm = 0.5993, lr_0 = 1.2173e-04
Loss = 8.2773e-02, PNorm = 85.5258, GNorm = 0.6124, lr_0 = 1.2164e-04
Loss = 7.1913e-02, PNorm = 85.5273, GNorm = 0.5551, lr_0 = 1.2156e-04
Loss = 7.8147e-02, PNorm = 85.5290, GNorm = 0.5070, lr_0 = 1.2148e-04
Loss = 7.8864e-02, PNorm = 85.5301, GNorm = 0.5203, lr_0 = 1.2139e-04
Loss = 7.0137e-02, PNorm = 85.5327, GNorm = 0.5495, lr_0 = 1.2131e-04
Loss = 7.8141e-02, PNorm = 85.5346, GNorm = 1.0292, lr_0 = 1.2123e-04
Loss = 7.3739e-02, PNorm = 85.5378, GNorm = 0.4613, lr_0 = 1.2114e-04
Loss = 8.6080e-02, PNorm = 85.5410, GNorm = 0.6089, lr_0 = 1.2106e-04
Loss = 7.0986e-02, PNorm = 85.5430, GNorm = 0.5762, lr_0 = 1.2098e-04
Loss = 7.2333e-02, PNorm = 85.5434, GNorm = 0.7338, lr_0 = 1.2090e-04
Loss = 7.1180e-02, PNorm = 85.5459, GNorm = 0.6457, lr_0 = 1.2081e-04
Loss = 7.7535e-02, PNorm = 85.5492, GNorm = 0.5480, lr_0 = 1.2073e-04
Loss = 7.7947e-02, PNorm = 85.5496, GNorm = 0.6367, lr_0 = 1.2065e-04
Loss = 7.7669e-02, PNorm = 85.5503, GNorm = 0.6754, lr_0 = 1.2056e-04
Loss = 7.8493e-02, PNorm = 85.5532, GNorm = 0.7083, lr_0 = 1.2048e-04
Loss = 8.8841e-02, PNorm = 85.5549, GNorm = 0.6637, lr_0 = 1.2040e-04
Loss = 7.5840e-02, PNorm = 85.5559, GNorm = 0.8207, lr_0 = 1.2032e-04
Loss = 8.6428e-02, PNorm = 85.5572, GNorm = 0.7608, lr_0 = 1.2023e-04
Loss = 8.2611e-02, PNorm = 85.5583, GNorm = 0.6337, lr_0 = 1.2015e-04
Loss = 8.0183e-02, PNorm = 85.5602, GNorm = 0.5790, lr_0 = 1.2007e-04
Loss = 7.9692e-02, PNorm = 85.5612, GNorm = 0.5942, lr_0 = 1.1999e-04
Loss = 7.4239e-02, PNorm = 85.5611, GNorm = 0.5842, lr_0 = 1.1991e-04
Loss = 7.2341e-02, PNorm = 85.5614, GNorm = 0.5107, lr_0 = 1.1982e-04
Loss = 7.7693e-02, PNorm = 85.5624, GNorm = 0.6109, lr_0 = 1.1974e-04
Loss = 7.4744e-02, PNorm = 85.5647, GNorm = 0.5324, lr_0 = 1.1966e-04
Loss = 7.2654e-02, PNorm = 85.5663, GNorm = 0.8000, lr_0 = 1.1958e-04
Loss = 8.0683e-02, PNorm = 85.5666, GNorm = 0.7375, lr_0 = 1.1950e-04
Loss = 7.8222e-02, PNorm = 85.5675, GNorm = 0.5057, lr_0 = 1.1941e-04
Loss = 7.3171e-02, PNorm = 85.5684, GNorm = 0.4902, lr_0 = 1.1933e-04
Loss = 6.4198e-02, PNorm = 85.5692, GNorm = 0.7455, lr_0 = 1.1925e-04
Loss = 9.0373e-02, PNorm = 85.5714, GNorm = 0.6299, lr_0 = 1.1917e-04
Loss = 7.9968e-02, PNorm = 85.5732, GNorm = 0.6568, lr_0 = 1.1909e-04
Loss = 7.7971e-02, PNorm = 85.5745, GNorm = 0.6647, lr_0 = 1.1901e-04
Loss = 7.6291e-02, PNorm = 85.5761, GNorm = 0.7350, lr_0 = 1.1892e-04
Loss = 8.3269e-02, PNorm = 85.5774, GNorm = 0.6200, lr_0 = 1.1884e-04
Loss = 9.0792e-02, PNorm = 85.5775, GNorm = 0.4974, lr_0 = 1.1876e-04
Loss = 8.2443e-02, PNorm = 85.5797, GNorm = 0.6643, lr_0 = 1.1868e-04
Loss = 8.0960e-02, PNorm = 85.5833, GNorm = 0.5608, lr_0 = 1.1860e-04
Loss = 6.9074e-02, PNorm = 85.5859, GNorm = 0.5907, lr_0 = 1.1852e-04
Loss = 7.9395e-02, PNorm = 85.5865, GNorm = 0.7369, lr_0 = 1.1844e-04
Loss = 8.4216e-02, PNorm = 85.5877, GNorm = 0.5979, lr_0 = 1.1835e-04
Loss = 7.2087e-02, PNorm = 85.5895, GNorm = 0.5502, lr_0 = 1.1827e-04
Loss = 7.8500e-02, PNorm = 85.5912, GNorm = 0.5242, lr_0 = 1.1819e-04
Loss = 7.2154e-02, PNorm = 85.5912, GNorm = 0.5907, lr_0 = 1.1811e-04
Loss = 7.3823e-02, PNorm = 85.5914, GNorm = 0.7324, lr_0 = 1.1803e-04
Loss = 7.9789e-02, PNorm = 85.5937, GNorm = 0.6755, lr_0 = 1.1795e-04
Loss = 8.2366e-02, PNorm = 85.5961, GNorm = 0.6819, lr_0 = 1.1787e-04
Validation mae = 0.227205
Epoch 28
Loss = 7.6270e-02, PNorm = 85.5968, GNorm = 0.5499, lr_0 = 1.1779e-04
Loss = 7.1058e-02, PNorm = 85.5985, GNorm = 0.5509, lr_0 = 1.1771e-04
Loss = 7.2786e-02, PNorm = 85.6013, GNorm = 0.6604, lr_0 = 1.1763e-04
Loss = 8.3747e-02, PNorm = 85.6039, GNorm = 0.7566, lr_0 = 1.1755e-04
Loss = 6.7170e-02, PNorm = 85.6064, GNorm = 0.6275, lr_0 = 1.1747e-04
Loss = 8.1264e-02, PNorm = 85.6078, GNorm = 0.6738, lr_0 = 1.1739e-04
Loss = 7.0512e-02, PNorm = 85.6092, GNorm = 0.6335, lr_0 = 1.1730e-04
Loss = 7.3438e-02, PNorm = 85.6103, GNorm = 1.0310, lr_0 = 1.1722e-04
Loss = 7.8970e-02, PNorm = 85.6117, GNorm = 0.7328, lr_0 = 1.1714e-04
Loss = 7.2059e-02, PNorm = 85.6150, GNorm = 0.6071, lr_0 = 1.1706e-04
Loss = 6.8345e-02, PNorm = 85.6177, GNorm = 0.5814, lr_0 = 1.1698e-04
Loss = 7.3214e-02, PNorm = 85.6185, GNorm = 0.5741, lr_0 = 1.1690e-04
Loss = 7.9852e-02, PNorm = 85.6177, GNorm = 0.4277, lr_0 = 1.1682e-04
Loss = 7.3584e-02, PNorm = 85.6183, GNorm = 0.6617, lr_0 = 1.1674e-04
Loss = 7.4683e-02, PNorm = 85.6211, GNorm = 0.5628, lr_0 = 1.1666e-04
Loss = 5.5473e-02, PNorm = 85.6246, GNorm = 0.5057, lr_0 = 1.1658e-04
Loss = 6.6621e-02, PNorm = 85.6272, GNorm = 0.6259, lr_0 = 1.1650e-04
Loss = 7.2527e-02, PNorm = 85.6297, GNorm = 0.7057, lr_0 = 1.1642e-04
Loss = 7.7961e-02, PNorm = 85.6336, GNorm = 0.5337, lr_0 = 1.1634e-04
Loss = 8.1135e-02, PNorm = 85.6360, GNorm = 0.6509, lr_0 = 1.1626e-04
Loss = 7.1697e-02, PNorm = 85.6366, GNorm = 0.6724, lr_0 = 1.1618e-04
Loss = 8.3462e-02, PNorm = 85.6383, GNorm = 0.5808, lr_0 = 1.1611e-04
Loss = 7.1014e-02, PNorm = 85.6386, GNorm = 0.5661, lr_0 = 1.1603e-04
Loss = 8.3654e-02, PNorm = 85.6391, GNorm = 0.6299, lr_0 = 1.1595e-04
Loss = 7.7967e-02, PNorm = 85.6397, GNorm = 0.4436, lr_0 = 1.1587e-04
Loss = 8.1539e-02, PNorm = 85.6420, GNorm = 0.5608, lr_0 = 1.1579e-04
Loss = 7.3186e-02, PNorm = 85.6434, GNorm = 0.8200, lr_0 = 1.1571e-04
Loss = 7.7534e-02, PNorm = 85.6451, GNorm = 0.8284, lr_0 = 1.1563e-04
Loss = 6.7799e-02, PNorm = 85.6465, GNorm = 0.5210, lr_0 = 1.1555e-04
Loss = 7.8805e-02, PNorm = 85.6480, GNorm = 0.5687, lr_0 = 1.1547e-04
Loss = 7.7589e-02, PNorm = 85.6499, GNorm = 0.6271, lr_0 = 1.1539e-04
Loss = 7.2462e-02, PNorm = 85.6508, GNorm = 0.5995, lr_0 = 1.1531e-04
Loss = 8.0251e-02, PNorm = 85.6519, GNorm = 0.5344, lr_0 = 1.1523e-04
Loss = 7.5229e-02, PNorm = 85.6532, GNorm = 0.6082, lr_0 = 1.1515e-04
Loss = 6.7240e-02, PNorm = 85.6554, GNorm = 0.6412, lr_0 = 1.1508e-04
Loss = 7.5707e-02, PNorm = 85.6570, GNorm = 0.6423, lr_0 = 1.1500e-04
Loss = 6.6242e-02, PNorm = 85.6556, GNorm = 0.4982, lr_0 = 1.1492e-04
Loss = 8.0866e-02, PNorm = 85.6560, GNorm = 0.7825, lr_0 = 1.1484e-04
Loss = 8.3130e-02, PNorm = 85.6573, GNorm = 0.7954, lr_0 = 1.1476e-04
Loss = 6.9667e-02, PNorm = 85.6598, GNorm = 0.6430, lr_0 = 1.1468e-04
Loss = 7.6026e-02, PNorm = 85.6617, GNorm = 0.6219, lr_0 = 1.1460e-04
Loss = 6.5915e-02, PNorm = 85.6625, GNorm = 0.5376, lr_0 = 1.1452e-04
Loss = 8.0337e-02, PNorm = 85.6637, GNorm = 0.6931, lr_0 = 1.1445e-04
Loss = 6.8619e-02, PNorm = 85.6655, GNorm = 0.5331, lr_0 = 1.1437e-04
Loss = 7.1892e-02, PNorm = 85.6674, GNorm = 0.6063, lr_0 = 1.1429e-04
Loss = 7.1741e-02, PNorm = 85.6680, GNorm = 0.5083, lr_0 = 1.1421e-04
Loss = 8.5873e-02, PNorm = 85.6694, GNorm = 0.7387, lr_0 = 1.1413e-04
Loss = 8.5910e-02, PNorm = 85.6706, GNorm = 0.5834, lr_0 = 1.1405e-04
Loss = 7.8688e-02, PNorm = 85.6734, GNorm = 0.6296, lr_0 = 1.1398e-04
Loss = 7.5698e-02, PNorm = 85.6759, GNorm = 0.5646, lr_0 = 1.1390e-04
Loss = 7.5691e-02, PNorm = 85.6763, GNorm = 0.6439, lr_0 = 1.1382e-04
Loss = 7.6089e-02, PNorm = 85.6774, GNorm = 0.6570, lr_0 = 1.1374e-04
Loss = 7.2649e-02, PNorm = 85.6810, GNorm = 0.8398, lr_0 = 1.1366e-04
Loss = 7.0035e-02, PNorm = 85.6832, GNorm = 0.6899, lr_0 = 1.1359e-04
Loss = 7.5221e-02, PNorm = 85.6838, GNorm = 0.7286, lr_0 = 1.1351e-04
Loss = 7.7011e-02, PNorm = 85.6855, GNorm = 0.5821, lr_0 = 1.1343e-04
Loss = 8.1442e-02, PNorm = 85.6887, GNorm = 0.8291, lr_0 = 1.1335e-04
Loss = 8.0270e-02, PNorm = 85.6905, GNorm = 0.6741, lr_0 = 1.1328e-04
Loss = 6.8338e-02, PNorm = 85.6917, GNorm = 0.5129, lr_0 = 1.1320e-04
Loss = 7.7037e-02, PNorm = 85.6939, GNorm = 0.6157, lr_0 = 1.1312e-04
Loss = 7.9833e-02, PNorm = 85.6951, GNorm = 0.5106, lr_0 = 1.1304e-04
Loss = 8.4407e-02, PNorm = 85.6952, GNorm = 0.7257, lr_0 = 1.1297e-04
Loss = 7.4457e-02, PNorm = 85.6955, GNorm = 0.5839, lr_0 = 1.1289e-04
Loss = 8.0980e-02, PNorm = 85.6988, GNorm = 0.7400, lr_0 = 1.1281e-04
Loss = 6.7317e-02, PNorm = 85.7004, GNorm = 0.5550, lr_0 = 1.1273e-04
Loss = 8.0156e-02, PNorm = 85.7022, GNorm = 0.6106, lr_0 = 1.1266e-04
Loss = 7.7232e-02, PNorm = 85.7050, GNorm = 0.4509, lr_0 = 1.1258e-04
Loss = 8.5937e-02, PNorm = 85.7069, GNorm = 0.7369, lr_0 = 1.1250e-04
Loss = 7.1070e-02, PNorm = 85.7089, GNorm = 0.5065, lr_0 = 1.1243e-04
Loss = 6.8828e-02, PNorm = 85.7110, GNorm = 0.6097, lr_0 = 1.1235e-04
Loss = 8.0957e-02, PNorm = 85.7128, GNorm = 0.6399, lr_0 = 1.1227e-04
Loss = 7.6000e-02, PNorm = 85.7131, GNorm = 0.7360, lr_0 = 1.1219e-04
Loss = 7.3870e-02, PNorm = 85.7138, GNorm = 0.7709, lr_0 = 1.1212e-04
Loss = 7.6707e-02, PNorm = 85.7166, GNorm = 0.7352, lr_0 = 1.1204e-04
Loss = 7.6907e-02, PNorm = 85.7171, GNorm = 0.5271, lr_0 = 1.1196e-04
Loss = 7.0544e-02, PNorm = 85.7183, GNorm = 0.6027, lr_0 = 1.1189e-04
Loss = 7.7623e-02, PNorm = 85.7205, GNorm = 0.8349, lr_0 = 1.1181e-04
Loss = 7.3744e-02, PNorm = 85.7224, GNorm = 0.8637, lr_0 = 1.1173e-04
Loss = 8.5047e-02, PNorm = 85.7248, GNorm = 0.7459, lr_0 = 1.1166e-04
Loss = 8.8396e-02, PNorm = 85.7254, GNorm = 0.7262, lr_0 = 1.1158e-04
Loss = 8.1732e-02, PNorm = 85.7273, GNorm = 0.5909, lr_0 = 1.1150e-04
Loss = 8.0496e-02, PNorm = 85.7286, GNorm = 0.6832, lr_0 = 1.1143e-04
Loss = 7.3873e-02, PNorm = 85.7289, GNorm = 0.7525, lr_0 = 1.1135e-04
Loss = 7.7340e-02, PNorm = 85.7302, GNorm = 0.6984, lr_0 = 1.1128e-04
Loss = 7.7350e-02, PNorm = 85.7326, GNorm = 0.6608, lr_0 = 1.1120e-04
Loss = 7.8543e-02, PNorm = 85.7337, GNorm = 0.6126, lr_0 = 1.1112e-04
Loss = 6.9960e-02, PNorm = 85.7341, GNorm = 0.6467, lr_0 = 1.1105e-04
Loss = 7.7165e-02, PNorm = 85.7355, GNorm = 0.7274, lr_0 = 1.1097e-04
Loss = 6.6426e-02, PNorm = 85.7367, GNorm = 0.6228, lr_0 = 1.1089e-04
Loss = 7.5149e-02, PNorm = 85.7376, GNorm = 0.7095, lr_0 = 1.1082e-04
Loss = 7.5702e-02, PNorm = 85.7396, GNorm = 0.7005, lr_0 = 1.1074e-04
Loss = 7.6238e-02, PNorm = 85.7419, GNorm = 0.6702, lr_0 = 1.1067e-04
Loss = 8.0187e-02, PNorm = 85.7434, GNorm = 0.6562, lr_0 = 1.1059e-04
Loss = 7.7734e-02, PNorm = 85.7451, GNorm = 0.9817, lr_0 = 1.1052e-04
Loss = 7.5627e-02, PNorm = 85.7478, GNorm = 0.5955, lr_0 = 1.1044e-04
Loss = 7.3067e-02, PNorm = 85.7490, GNorm = 0.6474, lr_0 = 1.1036e-04
Loss = 7.4267e-02, PNorm = 85.7508, GNorm = 0.5680, lr_0 = 1.1029e-04
Loss = 7.1299e-02, PNorm = 85.7535, GNorm = 0.7236, lr_0 = 1.1021e-04
Loss = 7.4647e-02, PNorm = 85.7534, GNorm = 0.6443, lr_0 = 1.1014e-04
Loss = 8.4418e-02, PNorm = 85.7546, GNorm = 0.5508, lr_0 = 1.1006e-04
Loss = 7.4107e-02, PNorm = 85.7568, GNorm = 0.4448, lr_0 = 1.0999e-04
Loss = 6.9783e-02, PNorm = 85.7582, GNorm = 0.6664, lr_0 = 1.0991e-04
Loss = 8.0738e-02, PNorm = 85.7601, GNorm = 0.7331, lr_0 = 1.0984e-04
Loss = 7.3659e-02, PNorm = 85.7617, GNorm = 0.6540, lr_0 = 1.0976e-04
Loss = 8.0088e-02, PNorm = 85.7637, GNorm = 0.6498, lr_0 = 1.0969e-04
Loss = 6.8938e-02, PNorm = 85.7656, GNorm = 0.4898, lr_0 = 1.0961e-04
Loss = 8.2297e-02, PNorm = 85.7681, GNorm = 0.5730, lr_0 = 1.0954e-04
Loss = 7.2706e-02, PNorm = 85.7688, GNorm = 0.4991, lr_0 = 1.0946e-04
Loss = 8.5539e-02, PNorm = 85.7707, GNorm = 0.6774, lr_0 = 1.0939e-04
Loss = 8.1126e-02, PNorm = 85.7736, GNorm = 0.6180, lr_0 = 1.0931e-04
Loss = 6.7847e-02, PNorm = 85.7769, GNorm = 0.4858, lr_0 = 1.0924e-04
Loss = 8.3959e-02, PNorm = 85.7798, GNorm = 0.8886, lr_0 = 1.0916e-04
Loss = 9.2037e-02, PNorm = 85.7798, GNorm = 0.9495, lr_0 = 1.0909e-04
Loss = 8.3396e-02, PNorm = 85.7812, GNorm = 0.6674, lr_0 = 1.0901e-04
Loss = 8.2935e-02, PNorm = 85.7825, GNorm = 0.6668, lr_0 = 1.0894e-04
Loss = 7.5125e-02, PNorm = 85.7855, GNorm = 0.4635, lr_0 = 1.0886e-04
Loss = 7.9791e-02, PNorm = 85.7873, GNorm = 0.6390, lr_0 = 1.0879e-04
Loss = 8.2515e-02, PNorm = 85.7870, GNorm = 0.7304, lr_0 = 1.0871e-04
Loss = 7.1696e-02, PNorm = 85.7882, GNorm = 0.5705, lr_0 = 1.0864e-04
Loss = 6.7142e-02, PNorm = 85.7906, GNorm = 0.5592, lr_0 = 1.0856e-04
Validation mae = 0.227279
Epoch 29
Loss = 7.2298e-02, PNorm = 85.7913, GNorm = 0.6702, lr_0 = 1.0849e-04
Loss = 6.6344e-02, PNorm = 85.7920, GNorm = 0.7654, lr_0 = 1.0841e-04
Loss = 6.3959e-02, PNorm = 85.7926, GNorm = 0.5963, lr_0 = 1.0834e-04
Loss = 8.2653e-02, PNorm = 85.7942, GNorm = 0.7491, lr_0 = 1.0827e-04
Loss = 6.9066e-02, PNorm = 85.7954, GNorm = 0.6594, lr_0 = 1.0819e-04
Loss = 8.1447e-02, PNorm = 85.7950, GNorm = 0.6080, lr_0 = 1.0812e-04
Loss = 7.2036e-02, PNorm = 85.7951, GNorm = 0.5355, lr_0 = 1.0804e-04
Loss = 6.7740e-02, PNorm = 85.7951, GNorm = 0.6052, lr_0 = 1.0797e-04
Loss = 7.3951e-02, PNorm = 85.7963, GNorm = 0.8792, lr_0 = 1.0790e-04
Loss = 7.4296e-02, PNorm = 85.7992, GNorm = 0.6033, lr_0 = 1.0782e-04
Loss = 7.5508e-02, PNorm = 85.8005, GNorm = 0.5026, lr_0 = 1.0775e-04
Loss = 7.3643e-02, PNorm = 85.8007, GNorm = 0.8302, lr_0 = 1.0767e-04
Loss = 7.4029e-02, PNorm = 85.8014, GNorm = 0.5829, lr_0 = 1.0760e-04
Loss = 7.5105e-02, PNorm = 85.8045, GNorm = 0.7055, lr_0 = 1.0753e-04
Loss = 6.8527e-02, PNorm = 85.8053, GNorm = 0.7513, lr_0 = 1.0745e-04
Loss = 7.4498e-02, PNorm = 85.8059, GNorm = 0.6959, lr_0 = 1.0738e-04
Loss = 8.2111e-02, PNorm = 85.8078, GNorm = 0.6694, lr_0 = 1.0731e-04
Loss = 6.5576e-02, PNorm = 85.8121, GNorm = 0.6305, lr_0 = 1.0723e-04
Loss = 7.4681e-02, PNorm = 85.8149, GNorm = 0.7059, lr_0 = 1.0716e-04
Loss = 7.8144e-02, PNorm = 85.8168, GNorm = 0.7843, lr_0 = 1.0709e-04
Loss = 8.5143e-02, PNorm = 85.8179, GNorm = 0.7245, lr_0 = 1.0701e-04
Loss = 7.5477e-02, PNorm = 85.8192, GNorm = 0.8504, lr_0 = 1.0694e-04
Loss = 7.4294e-02, PNorm = 85.8200, GNorm = 0.7003, lr_0 = 1.0687e-04
Loss = 8.1011e-02, PNorm = 85.8219, GNorm = 0.5923, lr_0 = 1.0679e-04
Loss = 7.0331e-02, PNorm = 85.8239, GNorm = 0.4761, lr_0 = 1.0672e-04
Loss = 7.5872e-02, PNorm = 85.8258, GNorm = 0.5387, lr_0 = 1.0665e-04
Loss = 7.2390e-02, PNorm = 85.8271, GNorm = 0.6333, lr_0 = 1.0657e-04
Loss = 7.7379e-02, PNorm = 85.8291, GNorm = 0.6001, lr_0 = 1.0650e-04
Loss = 7.3812e-02, PNorm = 85.8319, GNorm = 0.7104, lr_0 = 1.0643e-04
Loss = 7.1186e-02, PNorm = 85.8325, GNorm = 0.7058, lr_0 = 1.0635e-04
Loss = 7.8600e-02, PNorm = 85.8336, GNorm = 0.5590, lr_0 = 1.0628e-04
Loss = 7.8932e-02, PNorm = 85.8355, GNorm = 0.5143, lr_0 = 1.0621e-04
Loss = 7.2602e-02, PNorm = 85.8367, GNorm = 0.7040, lr_0 = 1.0614e-04
Loss = 7.4391e-02, PNorm = 85.8369, GNorm = 0.6457, lr_0 = 1.0606e-04
Loss = 7.9626e-02, PNorm = 85.8370, GNorm = 0.7607, lr_0 = 1.0599e-04
Loss = 7.1513e-02, PNorm = 85.8379, GNorm = 0.6627, lr_0 = 1.0592e-04
Loss = 8.5932e-02, PNorm = 85.8397, GNorm = 0.6016, lr_0 = 1.0585e-04
Loss = 7.4464e-02, PNorm = 85.8415, GNorm = 0.6266, lr_0 = 1.0577e-04
Loss = 7.9022e-02, PNorm = 85.8408, GNorm = 0.5022, lr_0 = 1.0570e-04
Loss = 7.3992e-02, PNorm = 85.8409, GNorm = 0.3946, lr_0 = 1.0563e-04
Loss = 7.3952e-02, PNorm = 85.8418, GNorm = 0.7082, lr_0 = 1.0556e-04
Loss = 6.6566e-02, PNorm = 85.8436, GNorm = 0.5670, lr_0 = 1.0548e-04
Loss = 8.2114e-02, PNorm = 85.8461, GNorm = 0.6778, lr_0 = 1.0541e-04
Loss = 6.7338e-02, PNorm = 85.8472, GNorm = 0.3871, lr_0 = 1.0534e-04
Loss = 7.1913e-02, PNorm = 85.8483, GNorm = 0.7046, lr_0 = 1.0527e-04
Loss = 6.9801e-02, PNorm = 85.8492, GNorm = 0.5964, lr_0 = 1.0519e-04
Loss = 7.0932e-02, PNorm = 85.8511, GNorm = 0.4826, lr_0 = 1.0512e-04
Loss = 7.0779e-02, PNorm = 85.8518, GNorm = 0.6073, lr_0 = 1.0505e-04
Loss = 7.6184e-02, PNorm = 85.8532, GNorm = 0.6079, lr_0 = 1.0498e-04
Loss = 7.3113e-02, PNorm = 85.8554, GNorm = 0.7097, lr_0 = 1.0491e-04
Loss = 8.0365e-02, PNorm = 85.8587, GNorm = 0.5534, lr_0 = 1.0483e-04
Loss = 7.0282e-02, PNorm = 85.8615, GNorm = 0.5540, lr_0 = 1.0476e-04
Loss = 7.3253e-02, PNorm = 85.8647, GNorm = 0.6423, lr_0 = 1.0469e-04
Loss = 7.0768e-02, PNorm = 85.8682, GNorm = 0.5349, lr_0 = 1.0462e-04
Loss = 7.5836e-02, PNorm = 85.8695, GNorm = 0.4846, lr_0 = 1.0455e-04
Loss = 7.6563e-02, PNorm = 85.8714, GNorm = 0.8021, lr_0 = 1.0448e-04
Loss = 6.7611e-02, PNorm = 85.8738, GNorm = 0.5471, lr_0 = 1.0440e-04
Loss = 7.1025e-02, PNorm = 85.8751, GNorm = 0.5569, lr_0 = 1.0433e-04
Loss = 7.2109e-02, PNorm = 85.8749, GNorm = 0.4824, lr_0 = 1.0426e-04
Loss = 8.0625e-02, PNorm = 85.8755, GNorm = 0.5978, lr_0 = 1.0419e-04
Loss = 7.8757e-02, PNorm = 85.8772, GNorm = 0.7504, lr_0 = 1.0412e-04
Loss = 7.7927e-02, PNorm = 85.8790, GNorm = 0.7009, lr_0 = 1.0405e-04
Loss = 6.7339e-02, PNorm = 85.8793, GNorm = 0.5169, lr_0 = 1.0398e-04
Loss = 8.1666e-02, PNorm = 85.8803, GNorm = 0.6369, lr_0 = 1.0391e-04
Loss = 6.0283e-02, PNorm = 85.8817, GNorm = 0.5453, lr_0 = 1.0383e-04
Loss = 8.2169e-02, PNorm = 85.8831, GNorm = 0.5794, lr_0 = 1.0376e-04
Loss = 6.9593e-02, PNorm = 85.8830, GNorm = 0.6729, lr_0 = 1.0369e-04
Loss = 9.0850e-02, PNorm = 85.8847, GNorm = 0.6217, lr_0 = 1.0362e-04
Loss = 7.9307e-02, PNorm = 85.8878, GNorm = 0.8648, lr_0 = 1.0355e-04
Loss = 6.9302e-02, PNorm = 85.8888, GNorm = 0.7974, lr_0 = 1.0348e-04
Loss = 8.1623e-02, PNorm = 85.8901, GNorm = 0.6237, lr_0 = 1.0341e-04
Loss = 7.4720e-02, PNorm = 85.8930, GNorm = 0.6077, lr_0 = 1.0334e-04
Loss = 6.4477e-02, PNorm = 85.8953, GNorm = 0.5765, lr_0 = 1.0327e-04
Loss = 7.7289e-02, PNorm = 85.8973, GNorm = 0.5858, lr_0 = 1.0320e-04
Loss = 7.6123e-02, PNorm = 85.8979, GNorm = 0.6449, lr_0 = 1.0312e-04
Loss = 7.0399e-02, PNorm = 85.8979, GNorm = 0.7393, lr_0 = 1.0305e-04
Loss = 7.3564e-02, PNorm = 85.8986, GNorm = 0.5315, lr_0 = 1.0298e-04
Loss = 7.6981e-02, PNorm = 85.9013, GNorm = 0.6123, lr_0 = 1.0291e-04
Loss = 7.4504e-02, PNorm = 85.9058, GNorm = 0.5419, lr_0 = 1.0284e-04
Loss = 7.7618e-02, PNorm = 85.9077, GNorm = 0.5238, lr_0 = 1.0277e-04
Loss = 6.8414e-02, PNorm = 85.9107, GNorm = 0.6504, lr_0 = 1.0270e-04
Loss = 7.4972e-02, PNorm = 85.9125, GNorm = 0.7071, lr_0 = 1.0263e-04
Loss = 7.3150e-02, PNorm = 85.9142, GNorm = 0.7560, lr_0 = 1.0256e-04
Loss = 7.3252e-02, PNorm = 85.9176, GNorm = 0.6920, lr_0 = 1.0249e-04
Loss = 7.6998e-02, PNorm = 85.9195, GNorm = 0.6509, lr_0 = 1.0242e-04
Loss = 8.2431e-02, PNorm = 85.9194, GNorm = 0.5859, lr_0 = 1.0235e-04
Loss = 8.0247e-02, PNorm = 85.9233, GNorm = 0.4897, lr_0 = 1.0228e-04
Loss = 7.5561e-02, PNorm = 85.9265, GNorm = 0.5945, lr_0 = 1.0221e-04
Loss = 7.6809e-02, PNorm = 85.9278, GNorm = 0.5968, lr_0 = 1.0214e-04
Loss = 7.6604e-02, PNorm = 85.9277, GNorm = 0.7329, lr_0 = 1.0207e-04
Loss = 7.2726e-02, PNorm = 85.9301, GNorm = 0.5085, lr_0 = 1.0200e-04
Loss = 8.6435e-02, PNorm = 85.9319, GNorm = 0.7199, lr_0 = 1.0193e-04
Loss = 8.6626e-02, PNorm = 85.9335, GNorm = 0.5940, lr_0 = 1.0186e-04
Loss = 6.8325e-02, PNorm = 85.9340, GNorm = 0.7689, lr_0 = 1.0179e-04
Loss = 7.9839e-02, PNorm = 85.9337, GNorm = 0.6421, lr_0 = 1.0172e-04
Loss = 7.1587e-02, PNorm = 85.9345, GNorm = 0.5666, lr_0 = 1.0165e-04
Loss = 7.2291e-02, PNorm = 85.9360, GNorm = 0.7111, lr_0 = 1.0158e-04
Loss = 8.9535e-02, PNorm = 85.9371, GNorm = 1.0901, lr_0 = 1.0151e-04
Loss = 7.4100e-02, PNorm = 85.9392, GNorm = 0.6266, lr_0 = 1.0144e-04
Loss = 7.7638e-02, PNorm = 85.9411, GNorm = 0.5783, lr_0 = 1.0137e-04
Loss = 7.1867e-02, PNorm = 85.9415, GNorm = 0.5804, lr_0 = 1.0130e-04
Loss = 7.6187e-02, PNorm = 85.9430, GNorm = 0.7628, lr_0 = 1.0123e-04
Loss = 8.2790e-02, PNorm = 85.9448, GNorm = 0.5760, lr_0 = 1.0116e-04
Loss = 7.1265e-02, PNorm = 85.9446, GNorm = 0.6052, lr_0 = 1.0110e-04
Loss = 7.0282e-02, PNorm = 85.9467, GNorm = 0.7087, lr_0 = 1.0103e-04
Loss = 7.6324e-02, PNorm = 85.9487, GNorm = 0.6268, lr_0 = 1.0096e-04
Loss = 7.6128e-02, PNorm = 85.9499, GNorm = 0.5271, lr_0 = 1.0089e-04
Loss = 6.8807e-02, PNorm = 85.9505, GNorm = 0.6042, lr_0 = 1.0082e-04
Loss = 7.8313e-02, PNorm = 85.9504, GNorm = 0.8713, lr_0 = 1.0075e-04
Loss = 9.2752e-02, PNorm = 85.9514, GNorm = 0.6994, lr_0 = 1.0068e-04
Loss = 7.5053e-02, PNorm = 85.9523, GNorm = 0.6761, lr_0 = 1.0061e-04
Loss = 6.9877e-02, PNorm = 85.9543, GNorm = 0.6071, lr_0 = 1.0054e-04
Loss = 8.3121e-02, PNorm = 85.9549, GNorm = 0.6699, lr_0 = 1.0047e-04
Loss = 8.7443e-02, PNorm = 85.9560, GNorm = 0.7672, lr_0 = 1.0041e-04
Loss = 8.8299e-02, PNorm = 85.9586, GNorm = 0.9159, lr_0 = 1.0034e-04
Loss = 7.9128e-02, PNorm = 85.9607, GNorm = 0.6506, lr_0 = 1.0027e-04
Loss = 7.3880e-02, PNorm = 85.9630, GNorm = 0.5578, lr_0 = 1.0020e-04
Loss = 8.0736e-02, PNorm = 85.9657, GNorm = 0.7265, lr_0 = 1.0013e-04
Loss = 7.1146e-02, PNorm = 85.9666, GNorm = 0.8493, lr_0 = 1.0006e-04
Loss = 7.6683e-02, PNorm = 85.9675, GNorm = 0.7642, lr_0 = 1.0000e-04
Validation mae = 0.225510
Model 0 best validation mae = 0.225510 on epoch 29
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.226333
Ensemble test mae = 0.226333
Fold 3
Splitting data with seed 3
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.1, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=500, bias=False)
        (W_h): Linear(in_features=500, out_features=500, bias=False)
        (W_o): Linear(in_features=633, out_features=500, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.1, inplace=False)
    (1): Linear(in_features=500, out_features=500, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.1, inplace=False)
    (4): Linear(in_features=500, out_features=500, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.1, inplace=False)
    (7): Linear(in_features=500, out_features=1, bias=True)
  )
)
Number of parameters = 1,142,001
Moving model to cuda
Epoch 0
Loss = 9.5584e-01, PNorm = 47.8615, GNorm = 2.6330, lr_0 = 1.0413e-04
Loss = 9.5854e-01, PNorm = 47.8628, GNorm = 4.4182, lr_0 = 1.0788e-04
Loss = 1.0386e+00, PNorm = 47.8645, GNorm = 3.9496, lr_0 = 1.1163e-04
Loss = 9.5253e-01, PNorm = 47.8663, GNorm = 2.8695, lr_0 = 1.1537e-04
Loss = 9.9183e-01, PNorm = 47.8686, GNorm = 3.4460, lr_0 = 1.1913e-04
Loss = 9.3850e-01, PNorm = 47.8722, GNorm = 2.8325, lr_0 = 1.2287e-04
Loss = 8.6395e-01, PNorm = 47.8777, GNorm = 6.2112, lr_0 = 1.2663e-04
Loss = 7.3697e-01, PNorm = 47.8851, GNorm = 1.9180, lr_0 = 1.3038e-04
Loss = 6.5432e-01, PNorm = 47.8934, GNorm = 2.5022, lr_0 = 1.3413e-04
Loss = 5.2969e-01, PNorm = 47.9001, GNorm = 2.0459, lr_0 = 1.3788e-04
Loss = 5.7572e-01, PNorm = 47.9037, GNorm = 9.2950, lr_0 = 1.4163e-04
Loss = 5.4949e-01, PNorm = 47.9097, GNorm = 10.5209, lr_0 = 1.4537e-04
Loss = 5.3495e-01, PNorm = 47.9155, GNorm = 27.5481, lr_0 = 1.4913e-04
Loss = 5.2458e-01, PNorm = 47.9201, GNorm = 5.8034, lr_0 = 1.5288e-04
Loss = 5.0280e-01, PNorm = 47.9241, GNorm = 6.6703, lr_0 = 1.5662e-04
Loss = 4.5330e-01, PNorm = 47.9287, GNorm = 6.5083, lr_0 = 1.6038e-04
Loss = 4.9386e-01, PNorm = 47.9345, GNorm = 34.4889, lr_0 = 1.6412e-04
Loss = 5.2991e-01, PNorm = 47.9384, GNorm = 11.7338, lr_0 = 1.6788e-04
Loss = 4.5966e-01, PNorm = 47.9441, GNorm = 10.1965, lr_0 = 1.7163e-04
Loss = 3.8819e-01, PNorm = 47.9495, GNorm = 2.8134, lr_0 = 1.7538e-04
Loss = 4.2741e-01, PNorm = 47.9532, GNorm = 13.3829, lr_0 = 1.7913e-04
Loss = 4.6632e-01, PNorm = 47.9585, GNorm = 5.7006, lr_0 = 1.8288e-04
Loss = 4.5852e-01, PNorm = 47.9647, GNorm = 19.4399, lr_0 = 1.8662e-04
Loss = 3.8009e-01, PNorm = 47.9704, GNorm = 12.1503, lr_0 = 1.9038e-04
Loss = 3.7936e-01, PNorm = 47.9758, GNorm = 4.0570, lr_0 = 1.9413e-04
Loss = 3.9362e-01, PNorm = 47.9807, GNorm = 3.8698, lr_0 = 1.9788e-04
Loss = 4.0356e-01, PNorm = 47.9857, GNorm = 13.1939, lr_0 = 2.0163e-04
Loss = 4.1119e-01, PNorm = 47.9922, GNorm = 2.7052, lr_0 = 2.0537e-04
Loss = 3.8790e-01, PNorm = 47.9979, GNorm = 1.6087, lr_0 = 2.0913e-04
Loss = 4.0080e-01, PNorm = 48.0031, GNorm = 3.4418, lr_0 = 2.1288e-04
Loss = 4.2506e-01, PNorm = 48.0103, GNorm = 11.6024, lr_0 = 2.1663e-04
Loss = 4.2026e-01, PNorm = 48.0178, GNorm = 7.6900, lr_0 = 2.2038e-04
Loss = 3.8185e-01, PNorm = 48.0253, GNorm = 9.9731, lr_0 = 2.2412e-04
Loss = 3.9761e-01, PNorm = 48.0307, GNorm = 3.3417, lr_0 = 2.2787e-04
Loss = 3.5906e-01, PNorm = 48.0384, GNorm = 12.9544, lr_0 = 2.3163e-04
Loss = 3.7908e-01, PNorm = 48.0425, GNorm = 10.6741, lr_0 = 2.3538e-04
Loss = 3.8755e-01, PNorm = 48.0488, GNorm = 6.6369, lr_0 = 2.3913e-04
Loss = 3.2348e-01, PNorm = 48.0547, GNorm = 1.9359, lr_0 = 2.4288e-04
Loss = 3.7472e-01, PNorm = 48.0596, GNorm = 4.9408, lr_0 = 2.4662e-04
Loss = 4.0931e-01, PNorm = 48.0656, GNorm = 15.6475, lr_0 = 2.5038e-04
Loss = 3.5341e-01, PNorm = 48.0730, GNorm = 4.2176, lr_0 = 2.5413e-04
Loss = 3.4605e-01, PNorm = 48.0792, GNorm = 2.9915, lr_0 = 2.5788e-04
Loss = 3.5889e-01, PNorm = 48.0845, GNorm = 8.4799, lr_0 = 2.6163e-04
Loss = 3.5251e-01, PNorm = 48.0905, GNorm = 4.4223, lr_0 = 2.6537e-04
Loss = 3.1958e-01, PNorm = 48.0978, GNorm = 8.5497, lr_0 = 2.6912e-04
Loss = 3.0583e-01, PNorm = 48.0999, GNorm = 2.6756, lr_0 = 2.7288e-04
Loss = 3.8143e-01, PNorm = 48.1068, GNorm = 7.3019, lr_0 = 2.7663e-04
Loss = 3.8834e-01, PNorm = 48.1135, GNorm = 9.6443, lr_0 = 2.8038e-04
Loss = 3.9260e-01, PNorm = 48.1190, GNorm = 10.8351, lr_0 = 2.8413e-04
Loss = 3.6414e-01, PNorm = 48.1279, GNorm = 5.9005, lr_0 = 2.8787e-04
Loss = 3.8364e-01, PNorm = 48.1360, GNorm = 13.4880, lr_0 = 2.9163e-04
Loss = 3.3784e-01, PNorm = 48.1450, GNorm = 4.9145, lr_0 = 2.9538e-04
Loss = 3.1396e-01, PNorm = 48.1514, GNorm = 9.5210, lr_0 = 2.9913e-04
Loss = 2.9784e-01, PNorm = 48.1592, GNorm = 1.7555, lr_0 = 3.0288e-04
Loss = 3.1246e-01, PNorm = 48.1634, GNorm = 1.4712, lr_0 = 3.0662e-04
Loss = 3.6770e-01, PNorm = 48.1694, GNorm = 6.4652, lr_0 = 3.1037e-04
Loss = 3.1263e-01, PNorm = 48.1791, GNorm = 1.9998, lr_0 = 3.1413e-04
Loss = 2.8938e-01, PNorm = 48.1870, GNorm = 7.0724, lr_0 = 3.1788e-04
Loss = 2.9784e-01, PNorm = 48.1945, GNorm = 7.8527, lr_0 = 3.2163e-04
Loss = 3.3204e-01, PNorm = 48.1985, GNorm = 11.9277, lr_0 = 3.2538e-04
Loss = 3.4735e-01, PNorm = 48.2070, GNorm = 14.3236, lr_0 = 3.2912e-04
Loss = 3.6289e-01, PNorm = 48.2129, GNorm = 1.6890, lr_0 = 3.3288e-04
Loss = 2.9028e-01, PNorm = 48.2218, GNorm = 5.6217, lr_0 = 3.3663e-04
Loss = 2.9603e-01, PNorm = 48.2278, GNorm = 6.0016, lr_0 = 3.4038e-04
Loss = 2.9217e-01, PNorm = 48.2348, GNorm = 16.9141, lr_0 = 3.4413e-04
Loss = 3.3091e-01, PNorm = 48.2419, GNorm = 10.5970, lr_0 = 3.4787e-04
Loss = 2.6615e-01, PNorm = 48.2505, GNorm = 1.7905, lr_0 = 3.5162e-04
Loss = 2.8526e-01, PNorm = 48.2551, GNorm = 8.5750, lr_0 = 3.5538e-04
Loss = 2.9057e-01, PNorm = 48.2606, GNorm = 2.7050, lr_0 = 3.5913e-04
Loss = 2.9366e-01, PNorm = 48.2677, GNorm = 8.6581, lr_0 = 3.6288e-04
Loss = 3.7260e-01, PNorm = 48.2728, GNorm = 13.6437, lr_0 = 3.6662e-04
Loss = 3.3152e-01, PNorm = 48.2812, GNorm = 4.1285, lr_0 = 3.7037e-04
Loss = 2.9050e-01, PNorm = 48.2930, GNorm = 7.6904, lr_0 = 3.7413e-04
Loss = 3.9761e-01, PNorm = 48.3041, GNorm = 7.8849, lr_0 = 3.7788e-04
Loss = 2.9461e-01, PNorm = 48.3156, GNorm = 4.7675, lr_0 = 3.8163e-04
Loss = 2.5932e-01, PNorm = 48.3261, GNorm = 5.4440, lr_0 = 3.8537e-04
Loss = 2.6376e-01, PNorm = 48.3336, GNorm = 2.3864, lr_0 = 3.8912e-04
Loss = 3.3688e-01, PNorm = 48.3411, GNorm = 8.0024, lr_0 = 3.9287e-04
Loss = 2.9219e-01, PNorm = 48.3468, GNorm = 4.9309, lr_0 = 3.9663e-04
Loss = 3.8199e-01, PNorm = 48.3558, GNorm = 4.2547, lr_0 = 4.0038e-04
Loss = 3.5575e-01, PNorm = 48.3679, GNorm = 10.4818, lr_0 = 4.0413e-04
Loss = 3.2186e-01, PNorm = 48.3779, GNorm = 1.6157, lr_0 = 4.0787e-04
Loss = 3.0121e-01, PNorm = 48.3887, GNorm = 1.9087, lr_0 = 4.1162e-04
Loss = 3.1307e-01, PNorm = 48.3964, GNorm = 7.7791, lr_0 = 4.1537e-04
Loss = 2.9142e-01, PNorm = 48.4059, GNorm = 7.6458, lr_0 = 4.1913e-04
Loss = 3.0429e-01, PNorm = 48.4144, GNorm = 3.9728, lr_0 = 4.2288e-04
Loss = 3.0178e-01, PNorm = 48.4263, GNorm = 2.4729, lr_0 = 4.2662e-04
Loss = 3.4639e-01, PNorm = 48.4354, GNorm = 10.9011, lr_0 = 4.3037e-04
Loss = 3.9044e-01, PNorm = 48.4445, GNorm = 4.5675, lr_0 = 4.3412e-04
Loss = 3.6641e-01, PNorm = 48.4569, GNorm = 7.7300, lr_0 = 4.3788e-04
Loss = 4.0426e-01, PNorm = 48.4738, GNorm = 1.0213, lr_0 = 4.4163e-04
Loss = 3.0752e-01, PNorm = 48.4903, GNorm = 2.3302, lr_0 = 4.4538e-04
Loss = 2.8801e-01, PNorm = 48.5005, GNorm = 3.6197, lr_0 = 4.4912e-04
Loss = 2.9781e-01, PNorm = 48.5023, GNorm = 2.2916, lr_0 = 4.5287e-04
Loss = 3.1530e-01, PNorm = 48.5099, GNorm = 2.3770, lr_0 = 4.5662e-04
Loss = 3.0950e-01, PNorm = 48.5170, GNorm = 9.2044, lr_0 = 4.6038e-04
Loss = 3.1417e-01, PNorm = 48.5267, GNorm = 5.8834, lr_0 = 4.6413e-04
Loss = 2.5322e-01, PNorm = 48.5370, GNorm = 1.5908, lr_0 = 4.6787e-04
Loss = 2.8094e-01, PNorm = 48.5449, GNorm = 2.6558, lr_0 = 4.7162e-04
Loss = 3.0764e-01, PNorm = 48.5529, GNorm = 2.6671, lr_0 = 4.7537e-04
Loss = 2.9458e-01, PNorm = 48.5650, GNorm = 5.0811, lr_0 = 4.7913e-04
Loss = 3.4084e-01, PNorm = 48.5720, GNorm = 2.8571, lr_0 = 4.8288e-04
Loss = 2.7492e-01, PNorm = 48.5894, GNorm = 1.5611, lr_0 = 4.8663e-04
Loss = 2.5244e-01, PNorm = 48.5964, GNorm = 3.4517, lr_0 = 4.9038e-04
Loss = 2.5841e-01, PNorm = 48.6010, GNorm = 2.1779, lr_0 = 4.9412e-04
Loss = 3.0365e-01, PNorm = 48.6100, GNorm = 4.2499, lr_0 = 4.9788e-04
Loss = 2.8138e-01, PNorm = 48.6222, GNorm = 1.0170, lr_0 = 5.0163e-04
Loss = 2.6027e-01, PNorm = 48.6351, GNorm = 4.4621, lr_0 = 5.0538e-04
Loss = 2.9022e-01, PNorm = 48.6407, GNorm = 5.4539, lr_0 = 5.0913e-04
Loss = 3.1999e-01, PNorm = 48.6471, GNorm = 3.5190, lr_0 = 5.1287e-04
Loss = 2.5625e-01, PNorm = 48.6570, GNorm = 1.0231, lr_0 = 5.1663e-04
Loss = 2.6327e-01, PNorm = 48.6690, GNorm = 2.0287, lr_0 = 5.2038e-04
Loss = 3.1412e-01, PNorm = 48.6753, GNorm = 3.4826, lr_0 = 5.2413e-04
Loss = 3.0384e-01, PNorm = 48.6925, GNorm = 5.3648, lr_0 = 5.2788e-04
Loss = 2.6709e-01, PNorm = 48.7081, GNorm = 7.6137, lr_0 = 5.3162e-04
Loss = 2.8455e-01, PNorm = 48.7189, GNorm = 9.1630, lr_0 = 5.3538e-04
Loss = 3.0289e-01, PNorm = 48.7328, GNorm = 4.0449, lr_0 = 5.3912e-04
Loss = 2.6637e-01, PNorm = 48.7462, GNorm = 1.4680, lr_0 = 5.4288e-04
Loss = 2.8086e-01, PNorm = 48.7581, GNorm = 1.1375, lr_0 = 5.4663e-04
Loss = 2.7276e-01, PNorm = 48.7667, GNorm = 1.3232, lr_0 = 5.5038e-04
Validation mae = 0.311818
Epoch 1
Loss = 2.6156e-01, PNorm = 48.7791, GNorm = 4.2643, lr_0 = 5.5413e-04
Loss = 2.6907e-01, PNorm = 48.7876, GNorm = 2.5149, lr_0 = 5.5787e-04
Loss = 2.7197e-01, PNorm = 48.8017, GNorm = 1.1568, lr_0 = 5.6163e-04
Loss = 3.0232e-01, PNorm = 48.8125, GNorm = 0.9572, lr_0 = 5.6538e-04
Loss = 2.7570e-01, PNorm = 48.8243, GNorm = 6.0472, lr_0 = 5.6913e-04
Loss = 2.7302e-01, PNorm = 48.8409, GNorm = 3.6793, lr_0 = 5.7288e-04
Loss = 2.3506e-01, PNorm = 48.8465, GNorm = 1.3726, lr_0 = 5.7662e-04
Loss = 2.5253e-01, PNorm = 48.8560, GNorm = 1.3222, lr_0 = 5.8038e-04
Loss = 2.8384e-01, PNorm = 48.8638, GNorm = 8.3682, lr_0 = 5.8413e-04
Loss = 2.6573e-01, PNorm = 48.8791, GNorm = 5.5446, lr_0 = 5.8788e-04
Loss = 2.6378e-01, PNorm = 48.8936, GNorm = 6.0181, lr_0 = 5.9163e-04
Loss = 2.8914e-01, PNorm = 48.9068, GNorm = 1.2356, lr_0 = 5.9538e-04
Loss = 2.6126e-01, PNorm = 48.9200, GNorm = 3.9706, lr_0 = 5.9913e-04
Loss = 3.1608e-01, PNorm = 48.9354, GNorm = 1.1116, lr_0 = 6.0288e-04
Loss = 3.1444e-01, PNorm = 48.9603, GNorm = 3.3384, lr_0 = 6.0663e-04
Loss = 2.6935e-01, PNorm = 48.9808, GNorm = 1.7644, lr_0 = 6.1038e-04
Loss = 2.6999e-01, PNorm = 49.0025, GNorm = 1.6303, lr_0 = 6.1413e-04
Loss = 2.8762e-01, PNorm = 49.0139, GNorm = 0.7920, lr_0 = 6.1788e-04
Loss = 3.0724e-01, PNorm = 49.0339, GNorm = 2.1406, lr_0 = 6.2163e-04
Loss = 2.9486e-01, PNorm = 49.0458, GNorm = 1.2111, lr_0 = 6.2538e-04
Loss = 2.4782e-01, PNorm = 49.0584, GNorm = 2.4720, lr_0 = 6.2913e-04
Loss = 2.8617e-01, PNorm = 49.0696, GNorm = 3.5316, lr_0 = 6.3288e-04
Loss = 2.5927e-01, PNorm = 49.0876, GNorm = 3.5071, lr_0 = 6.3663e-04
Loss = 2.3762e-01, PNorm = 49.1022, GNorm = 5.7235, lr_0 = 6.4038e-04
Loss = 2.6546e-01, PNorm = 49.1148, GNorm = 4.5348, lr_0 = 6.4413e-04
Loss = 2.7130e-01, PNorm = 49.1307, GNorm = 10.2038, lr_0 = 6.4788e-04
Loss = 2.6124e-01, PNorm = 49.1487, GNorm = 4.4230, lr_0 = 6.5163e-04
Loss = 2.6028e-01, PNorm = 49.1643, GNorm = 1.3229, lr_0 = 6.5538e-04
Loss = 2.5823e-01, PNorm = 49.1738, GNorm = 0.9608, lr_0 = 6.5913e-04
Loss = 2.4010e-01, PNorm = 49.1831, GNorm = 2.0678, lr_0 = 6.6288e-04
Loss = 2.5112e-01, PNorm = 49.1936, GNorm = 4.6558, lr_0 = 6.6663e-04
Loss = 2.8186e-01, PNorm = 49.2039, GNorm = 5.9528, lr_0 = 6.7038e-04
Loss = 2.8932e-01, PNorm = 49.2196, GNorm = 2.4398, lr_0 = 6.7413e-04
Loss = 2.9308e-01, PNorm = 49.2401, GNorm = 0.8669, lr_0 = 6.7788e-04
Loss = 2.5768e-01, PNorm = 49.2550, GNorm = 1.2809, lr_0 = 6.8163e-04
Loss = 2.4698e-01, PNorm = 49.2637, GNorm = 7.6617, lr_0 = 6.8538e-04
Loss = 2.8188e-01, PNorm = 49.2830, GNorm = 4.0705, lr_0 = 6.8913e-04
Loss = 2.6395e-01, PNorm = 49.3037, GNorm = 1.3539, lr_0 = 6.9288e-04
Loss = 3.2443e-01, PNorm = 49.3173, GNorm = 1.4392, lr_0 = 6.9663e-04
Loss = 2.6197e-01, PNorm = 49.3335, GNorm = 2.3579, lr_0 = 7.0038e-04
Loss = 2.8215e-01, PNorm = 49.3524, GNorm = 3.7827, lr_0 = 7.0413e-04
Loss = 2.8524e-01, PNorm = 49.3679, GNorm = 2.9906, lr_0 = 7.0788e-04
Loss = 2.6370e-01, PNorm = 49.3927, GNorm = 6.8854, lr_0 = 7.1163e-04
Loss = 2.8326e-01, PNorm = 49.4050, GNorm = 5.3859, lr_0 = 7.1538e-04
Loss = 2.5642e-01, PNorm = 49.4177, GNorm = 1.9595, lr_0 = 7.1913e-04
Loss = 3.0417e-01, PNorm = 49.4344, GNorm = 6.2185, lr_0 = 7.2288e-04
Loss = 2.4102e-01, PNorm = 49.4439, GNorm = 0.9881, lr_0 = 7.2663e-04
Loss = 3.2201e-01, PNorm = 49.4665, GNorm = 6.3413, lr_0 = 7.3038e-04
Loss = 3.4302e-01, PNorm = 49.4914, GNorm = 3.6866, lr_0 = 7.3413e-04
Loss = 2.8811e-01, PNorm = 49.5165, GNorm = 1.7094, lr_0 = 7.3788e-04
Loss = 2.3894e-01, PNorm = 49.5352, GNorm = 2.3535, lr_0 = 7.4163e-04
Loss = 2.4516e-01, PNorm = 49.5547, GNorm = 5.7958, lr_0 = 7.4538e-04
Loss = 2.7167e-01, PNorm = 49.5695, GNorm = 4.8083, lr_0 = 7.4913e-04
Loss = 2.7073e-01, PNorm = 49.5861, GNorm = 1.4864, lr_0 = 7.5288e-04
Loss = 2.6727e-01, PNorm = 49.6076, GNorm = 1.8590, lr_0 = 7.5663e-04
Loss = 2.2987e-01, PNorm = 49.6307, GNorm = 1.9479, lr_0 = 7.6038e-04
Loss = 2.2901e-01, PNorm = 49.6498, GNorm = 2.5351, lr_0 = 7.6413e-04
Loss = 2.5157e-01, PNorm = 49.6651, GNorm = 1.1921, lr_0 = 7.6788e-04
Loss = 2.6065e-01, PNorm = 49.6865, GNorm = 1.2516, lr_0 = 7.7163e-04
Loss = 2.7384e-01, PNorm = 49.7085, GNorm = 1.7814, lr_0 = 7.7538e-04
Loss = 2.7168e-01, PNorm = 49.7194, GNorm = 0.9341, lr_0 = 7.7913e-04
Loss = 2.7380e-01, PNorm = 49.7342, GNorm = 2.8851, lr_0 = 7.8288e-04
Loss = 2.6094e-01, PNorm = 49.7496, GNorm = 1.4716, lr_0 = 7.8663e-04
Loss = 2.9870e-01, PNorm = 49.7773, GNorm = 1.0808, lr_0 = 7.9038e-04
Loss = 2.5299e-01, PNorm = 49.7959, GNorm = 3.4002, lr_0 = 7.9413e-04
Loss = 2.6494e-01, PNorm = 49.8057, GNorm = 1.8520, lr_0 = 7.9788e-04
Loss = 2.9469e-01, PNorm = 49.8284, GNorm = 8.9794, lr_0 = 8.0163e-04
Loss = 2.9786e-01, PNorm = 49.8409, GNorm = 1.9836, lr_0 = 8.0538e-04
Loss = 3.0607e-01, PNorm = 49.8566, GNorm = 1.0223, lr_0 = 8.0913e-04
Loss = 2.6119e-01, PNorm = 49.8759, GNorm = 3.5724, lr_0 = 8.1288e-04
Loss = 2.3580e-01, PNorm = 49.8898, GNorm = 0.7988, lr_0 = 8.1663e-04
Loss = 2.4594e-01, PNorm = 49.9047, GNorm = 2.1357, lr_0 = 8.2038e-04
Loss = 2.5680e-01, PNorm = 49.9167, GNorm = 2.4743, lr_0 = 8.2413e-04
Loss = 2.1877e-01, PNorm = 49.9366, GNorm = 3.2006, lr_0 = 8.2788e-04
Loss = 2.8550e-01, PNorm = 49.9577, GNorm = 2.9717, lr_0 = 8.3163e-04
Loss = 2.5033e-01, PNorm = 49.9827, GNorm = 2.0455, lr_0 = 8.3538e-04
Loss = 2.4956e-01, PNorm = 50.0057, GNorm = 1.9395, lr_0 = 8.3913e-04
Loss = 2.4858e-01, PNorm = 50.0247, GNorm = 3.8099, lr_0 = 8.4288e-04
Loss = 2.6612e-01, PNorm = 50.0434, GNorm = 1.1641, lr_0 = 8.4663e-04
Loss = 2.9036e-01, PNorm = 50.0669, GNorm = 2.5160, lr_0 = 8.5038e-04
Loss = 2.7396e-01, PNorm = 50.1025, GNorm = 1.5266, lr_0 = 8.5413e-04
Loss = 2.4325e-01, PNorm = 50.1299, GNorm = 2.1618, lr_0 = 8.5788e-04
Loss = 2.4575e-01, PNorm = 50.1532, GNorm = 2.6460, lr_0 = 8.6163e-04
Loss = 2.6468e-01, PNorm = 50.1800, GNorm = 2.4901, lr_0 = 8.6538e-04
Loss = 2.5534e-01, PNorm = 50.1957, GNorm = 1.6870, lr_0 = 8.6913e-04
Loss = 2.5253e-01, PNorm = 50.2226, GNorm = 2.9395, lr_0 = 8.7288e-04
Loss = 2.9950e-01, PNorm = 50.2481, GNorm = 1.0779, lr_0 = 8.7663e-04
Loss = 2.4562e-01, PNorm = 50.2755, GNorm = 4.4474, lr_0 = 8.8038e-04
Loss = 2.7808e-01, PNorm = 50.3032, GNorm = 2.3872, lr_0 = 8.8413e-04
Loss = 3.2965e-01, PNorm = 50.3319, GNorm = 4.8173, lr_0 = 8.8788e-04
Loss = 2.8000e-01, PNorm = 50.3676, GNorm = 1.4660, lr_0 = 8.9163e-04
Loss = 2.7887e-01, PNorm = 50.3935, GNorm = 4.1786, lr_0 = 8.9538e-04
Loss = 2.7079e-01, PNorm = 50.4328, GNorm = 1.4276, lr_0 = 8.9913e-04
Loss = 2.6382e-01, PNorm = 50.4431, GNorm = 2.7159, lr_0 = 9.0288e-04
Loss = 2.6201e-01, PNorm = 50.4665, GNorm = 2.9114, lr_0 = 9.0663e-04
Loss = 2.3841e-01, PNorm = 50.4965, GNorm = 1.0334, lr_0 = 9.1038e-04
Loss = 2.4932e-01, PNorm = 50.5024, GNorm = 4.3385, lr_0 = 9.1413e-04
Loss = 2.8338e-01, PNorm = 50.5282, GNorm = 7.1955, lr_0 = 9.1788e-04
Loss = 2.7904e-01, PNorm = 50.5443, GNorm = 1.3329, lr_0 = 9.2163e-04
Loss = 2.9057e-01, PNorm = 50.5569, GNorm = 2.5157, lr_0 = 9.2538e-04
Loss = 2.5728e-01, PNorm = 50.5836, GNorm = 3.3090, lr_0 = 9.2913e-04
Loss = 2.4495e-01, PNorm = 50.5973, GNorm = 4.8522, lr_0 = 9.3288e-04
Loss = 2.7110e-01, PNorm = 50.6253, GNorm = 5.1712, lr_0 = 9.3663e-04
Loss = 2.8181e-01, PNorm = 50.6464, GNorm = 4.6955, lr_0 = 9.4038e-04
Loss = 2.5427e-01, PNorm = 50.6732, GNorm = 2.0894, lr_0 = 9.4413e-04
Loss = 2.4187e-01, PNorm = 50.6900, GNorm = 0.9846, lr_0 = 9.4788e-04
Loss = 2.9031e-01, PNorm = 50.7116, GNorm = 1.3155, lr_0 = 9.5163e-04
Loss = 3.0552e-01, PNorm = 50.7397, GNorm = 1.4244, lr_0 = 9.5538e-04
Loss = 2.6494e-01, PNorm = 50.7719, GNorm = 0.7232, lr_0 = 9.5913e-04
Loss = 2.5606e-01, PNorm = 50.7961, GNorm = 2.7809, lr_0 = 9.6288e-04
Loss = 2.6215e-01, PNorm = 50.8238, GNorm = 2.5521, lr_0 = 9.6663e-04
Loss = 2.8095e-01, PNorm = 50.8477, GNorm = 0.7949, lr_0 = 9.7038e-04
Loss = 2.3328e-01, PNorm = 50.8623, GNorm = 0.9497, lr_0 = 9.7413e-04
Loss = 2.6389e-01, PNorm = 50.8899, GNorm = 1.2152, lr_0 = 9.7788e-04
Loss = 2.7813e-01, PNorm = 50.9012, GNorm = 4.9734, lr_0 = 9.8163e-04
Loss = 2.3843e-01, PNorm = 50.9206, GNorm = 2.2984, lr_0 = 9.8537e-04
Loss = 2.5192e-01, PNorm = 50.9445, GNorm = 4.6803, lr_0 = 9.8912e-04
Loss = 2.4272e-01, PNorm = 50.9726, GNorm = 2.6492, lr_0 = 9.9288e-04
Loss = 2.6038e-01, PNorm = 50.9977, GNorm = 0.8779, lr_0 = 9.9663e-04
Loss = 2.2354e-01, PNorm = 51.0188, GNorm = 2.5949, lr_0 = 9.9993e-04
Validation mae = 0.295873
Epoch 2
Loss = 2.1135e-01, PNorm = 51.0403, GNorm = 2.1180, lr_0 = 9.9925e-04
Loss = 2.2143e-01, PNorm = 51.0612, GNorm = 1.5574, lr_0 = 9.9856e-04
Loss = 2.6420e-01, PNorm = 51.0851, GNorm = 1.4971, lr_0 = 9.9788e-04
Loss = 2.4582e-01, PNorm = 51.1156, GNorm = 2.3360, lr_0 = 9.9719e-04
Loss = 2.3074e-01, PNorm = 51.1278, GNorm = 2.1357, lr_0 = 9.9651e-04
Loss = 2.1959e-01, PNorm = 51.1426, GNorm = 3.6576, lr_0 = 9.9583e-04
Loss = 2.5603e-01, PNorm = 51.1551, GNorm = 2.5713, lr_0 = 9.9515e-04
Loss = 2.2402e-01, PNorm = 51.1809, GNorm = 1.4484, lr_0 = 9.9446e-04
Loss = 2.6476e-01, PNorm = 51.1998, GNorm = 0.8396, lr_0 = 9.9378e-04
Loss = 2.4346e-01, PNorm = 51.2217, GNorm = 4.0185, lr_0 = 9.9310e-04
Loss = 2.4780e-01, PNorm = 51.2431, GNorm = 2.2059, lr_0 = 9.9242e-04
Loss = 2.5199e-01, PNorm = 51.2744, GNorm = 3.3084, lr_0 = 9.9174e-04
Loss = 2.4375e-01, PNorm = 51.3054, GNorm = 1.3734, lr_0 = 9.9106e-04
Loss = 2.2513e-01, PNorm = 51.3336, GNorm = 1.7174, lr_0 = 9.9038e-04
Loss = 2.5448e-01, PNorm = 51.3558, GNorm = 1.1678, lr_0 = 9.8971e-04
Loss = 2.5285e-01, PNorm = 51.3789, GNorm = 3.4970, lr_0 = 9.8903e-04
Loss = 2.3257e-01, PNorm = 51.4017, GNorm = 1.6655, lr_0 = 9.8835e-04
Loss = 2.5378e-01, PNorm = 51.4226, GNorm = 3.2343, lr_0 = 9.8767e-04
Loss = 2.0973e-01, PNorm = 51.4460, GNorm = 1.0950, lr_0 = 9.8700e-04
Loss = 2.5460e-01, PNorm = 51.4748, GNorm = 3.4589, lr_0 = 9.8632e-04
Loss = 2.2286e-01, PNorm = 51.4952, GNorm = 2.5209, lr_0 = 9.8564e-04
Loss = 2.3884e-01, PNorm = 51.5206, GNorm = 2.4306, lr_0 = 9.8497e-04
Loss = 2.3184e-01, PNorm = 51.5385, GNorm = 1.9323, lr_0 = 9.8429e-04
Loss = 2.3626e-01, PNorm = 51.5566, GNorm = 0.8055, lr_0 = 9.8362e-04
Loss = 2.3220e-01, PNorm = 51.5771, GNorm = 2.4966, lr_0 = 9.8295e-04
Loss = 3.2007e-01, PNorm = 51.6136, GNorm = 5.3893, lr_0 = 9.8227e-04
Loss = 2.8437e-01, PNorm = 51.6508, GNorm = 1.4512, lr_0 = 9.8160e-04
Loss = 2.3531e-01, PNorm = 51.6828, GNorm = 2.0549, lr_0 = 9.8093e-04
Loss = 2.3186e-01, PNorm = 51.7046, GNorm = 1.0578, lr_0 = 9.8026e-04
Loss = 2.0958e-01, PNorm = 51.7340, GNorm = 1.7338, lr_0 = 9.7958e-04
Loss = 2.2041e-01, PNorm = 51.7525, GNorm = 2.2230, lr_0 = 9.7891e-04
Loss = 2.3282e-01, PNorm = 51.7802, GNorm = 1.8174, lr_0 = 9.7824e-04
Loss = 2.1540e-01, PNorm = 51.8016, GNorm = 1.3929, lr_0 = 9.7757e-04
Loss = 2.2784e-01, PNorm = 51.8178, GNorm = 3.1392, lr_0 = 9.7690e-04
Loss = 2.7583e-01, PNorm = 51.8441, GNorm = 2.8465, lr_0 = 9.7623e-04
Loss = 2.3132e-01, PNorm = 51.8867, GNorm = 2.5065, lr_0 = 9.7556e-04
Loss = 2.6092e-01, PNorm = 51.9209, GNorm = 2.8061, lr_0 = 9.7490e-04
Loss = 2.4514e-01, PNorm = 51.9559, GNorm = 0.8658, lr_0 = 9.7423e-04
Loss = 2.4546e-01, PNorm = 51.9923, GNorm = 2.6263, lr_0 = 9.7356e-04
Loss = 2.2645e-01, PNorm = 52.0087, GNorm = 2.3260, lr_0 = 9.7289e-04
Loss = 2.1784e-01, PNorm = 52.0269, GNorm = 1.0229, lr_0 = 9.7223e-04
Loss = 2.4770e-01, PNorm = 52.0505, GNorm = 4.0711, lr_0 = 9.7156e-04
Loss = 2.7700e-01, PNorm = 52.0828, GNorm = 2.9581, lr_0 = 9.7090e-04
Loss = 2.2876e-01, PNorm = 52.1158, GNorm = 2.3418, lr_0 = 9.7023e-04
Loss = 2.3496e-01, PNorm = 52.1493, GNorm = 2.2290, lr_0 = 9.6957e-04
Loss = 2.3157e-01, PNorm = 52.1692, GNorm = 2.5237, lr_0 = 9.6890e-04
Loss = 2.1249e-01, PNorm = 52.1939, GNorm = 2.4572, lr_0 = 9.6824e-04
Loss = 2.3235e-01, PNorm = 52.2203, GNorm = 3.8118, lr_0 = 9.6757e-04
Loss = 2.8549e-01, PNorm = 52.2471, GNorm = 1.4342, lr_0 = 9.6691e-04
Loss = 2.3872e-01, PNorm = 52.2834, GNorm = 0.9005, lr_0 = 9.6625e-04
Loss = 2.2194e-01, PNorm = 52.3124, GNorm = 1.4929, lr_0 = 9.6559e-04
Loss = 2.9819e-01, PNorm = 52.3361, GNorm = 1.7157, lr_0 = 9.6493e-04
Loss = 2.5245e-01, PNorm = 52.3729, GNorm = 1.7999, lr_0 = 9.6427e-04
Loss = 2.0968e-01, PNorm = 52.3912, GNorm = 0.8713, lr_0 = 9.6360e-04
Loss = 2.2581e-01, PNorm = 52.4079, GNorm = 3.8654, lr_0 = 9.6294e-04
Loss = 2.1596e-01, PNorm = 52.4267, GNorm = 1.3759, lr_0 = 9.6228e-04
Loss = 2.2933e-01, PNorm = 52.4545, GNorm = 3.7367, lr_0 = 9.6163e-04
Loss = 2.1114e-01, PNorm = 52.4778, GNorm = 0.7090, lr_0 = 9.6097e-04
Loss = 1.9609e-01, PNorm = 52.5009, GNorm = 1.8578, lr_0 = 9.6031e-04
Loss = 1.8720e-01, PNorm = 52.5172, GNorm = 1.6157, lr_0 = 9.5965e-04
Loss = 2.2634e-01, PNorm = 52.5361, GNorm = 1.0272, lr_0 = 9.5899e-04
Loss = 2.1920e-01, PNorm = 52.5618, GNorm = 0.9351, lr_0 = 9.5834e-04
Loss = 2.3381e-01, PNorm = 52.5824, GNorm = 1.3273, lr_0 = 9.5768e-04
Loss = 2.1974e-01, PNorm = 52.6072, GNorm = 0.8093, lr_0 = 9.5702e-04
Loss = 2.1734e-01, PNorm = 52.6218, GNorm = 3.6430, lr_0 = 9.5637e-04
Loss = 2.3904e-01, PNorm = 52.6550, GNorm = 2.3751, lr_0 = 9.5571e-04
Loss = 2.3005e-01, PNorm = 52.6721, GNorm = 0.8150, lr_0 = 9.5506e-04
Loss = 2.0921e-01, PNorm = 52.6976, GNorm = 1.5504, lr_0 = 9.5440e-04
Loss = 2.2380e-01, PNorm = 52.7286, GNorm = 0.8467, lr_0 = 9.5375e-04
Loss = 2.2151e-01, PNorm = 52.7491, GNorm = 2.7757, lr_0 = 9.5310e-04
Loss = 1.9492e-01, PNorm = 52.7641, GNorm = 0.8942, lr_0 = 9.5244e-04
Loss = 2.3460e-01, PNorm = 52.7869, GNorm = 1.2318, lr_0 = 9.5179e-04
Loss = 2.3087e-01, PNorm = 52.8108, GNorm = 1.9744, lr_0 = 9.5114e-04
Loss = 2.5325e-01, PNorm = 52.8433, GNorm = 0.7148, lr_0 = 9.5049e-04
Loss = 2.2212e-01, PNorm = 52.8768, GNorm = 3.2780, lr_0 = 9.4984e-04
Loss = 2.0882e-01, PNorm = 52.8929, GNorm = 1.0630, lr_0 = 9.4919e-04
Loss = 1.9875e-01, PNorm = 52.9119, GNorm = 2.5507, lr_0 = 9.4854e-04
Loss = 2.3373e-01, PNorm = 52.9339, GNorm = 2.3060, lr_0 = 9.4789e-04
Loss = 2.3179e-01, PNorm = 52.9655, GNorm = 0.7649, lr_0 = 9.4724e-04
Loss = 2.3975e-01, PNorm = 52.9947, GNorm = 2.1779, lr_0 = 9.4659e-04
Loss = 2.5204e-01, PNorm = 53.0157, GNorm = 1.9660, lr_0 = 9.4594e-04
Loss = 2.5566e-01, PNorm = 53.0437, GNorm = 2.1904, lr_0 = 9.4529e-04
Loss = 2.2546e-01, PNorm = 53.0695, GNorm = 1.3417, lr_0 = 9.4464e-04
Loss = 2.2451e-01, PNorm = 53.0895, GNorm = 0.8565, lr_0 = 9.4400e-04
Loss = 2.1607e-01, PNorm = 53.1106, GNorm = 0.8227, lr_0 = 9.4335e-04
Loss = 2.3773e-01, PNorm = 53.1356, GNorm = 2.9895, lr_0 = 9.4270e-04
Loss = 1.9547e-01, PNorm = 53.1623, GNorm = 0.5136, lr_0 = 9.4206e-04
Loss = 2.1952e-01, PNorm = 53.1889, GNorm = 1.1017, lr_0 = 9.4141e-04
Loss = 2.2111e-01, PNorm = 53.2129, GNorm = 1.3546, lr_0 = 9.4077e-04
Loss = 2.1925e-01, PNorm = 53.2335, GNorm = 5.4683, lr_0 = 9.4012e-04
Loss = 2.2860e-01, PNorm = 53.2592, GNorm = 2.3578, lr_0 = 9.3948e-04
Loss = 2.2181e-01, PNorm = 53.2810, GNorm = 1.5093, lr_0 = 9.3884e-04
Loss = 2.4281e-01, PNorm = 53.3025, GNorm = 3.5932, lr_0 = 9.3819e-04
Loss = 2.3539e-01, PNorm = 53.3248, GNorm = 2.2170, lr_0 = 9.3755e-04
Loss = 2.0821e-01, PNorm = 53.3584, GNorm = 1.0619, lr_0 = 9.3691e-04
Loss = 2.1366e-01, PNorm = 53.3885, GNorm = 0.8308, lr_0 = 9.3627e-04
Loss = 1.9622e-01, PNorm = 53.4184, GNorm = 2.0497, lr_0 = 9.3562e-04
Loss = 2.1500e-01, PNorm = 53.4380, GNorm = 0.9209, lr_0 = 9.3498e-04
Loss = 2.1644e-01, PNorm = 53.4584, GNorm = 2.4896, lr_0 = 9.3434e-04
Loss = 2.1607e-01, PNorm = 53.4801, GNorm = 0.7126, lr_0 = 9.3370e-04
Loss = 2.2343e-01, PNorm = 53.5057, GNorm = 1.7956, lr_0 = 9.3306e-04
Loss = 1.9360e-01, PNorm = 53.5360, GNorm = 0.8735, lr_0 = 9.3242e-04
Loss = 2.2117e-01, PNorm = 53.5625, GNorm = 1.9105, lr_0 = 9.3178e-04
Loss = 2.2759e-01, PNorm = 53.5892, GNorm = 1.0159, lr_0 = 9.3115e-04
Loss = 2.0343e-01, PNorm = 53.6145, GNorm = 1.2856, lr_0 = 9.3051e-04
Loss = 2.5046e-01, PNorm = 53.6330, GNorm = 2.8870, lr_0 = 9.2987e-04
Loss = 2.2044e-01, PNorm = 53.6595, GNorm = 1.5802, lr_0 = 9.2923e-04
Loss = 2.1845e-01, PNorm = 53.6830, GNorm = 2.3712, lr_0 = 9.2860e-04
Loss = 2.4298e-01, PNorm = 53.7068, GNorm = 1.6252, lr_0 = 9.2796e-04
Loss = 2.1974e-01, PNorm = 53.7311, GNorm = 1.4855, lr_0 = 9.2733e-04
Loss = 2.0586e-01, PNorm = 53.7537, GNorm = 2.0454, lr_0 = 9.2669e-04
Loss = 2.1508e-01, PNorm = 53.7758, GNorm = 1.6883, lr_0 = 9.2606e-04
Loss = 2.1470e-01, PNorm = 53.8082, GNorm = 0.9094, lr_0 = 9.2542e-04
Loss = 1.9486e-01, PNorm = 53.8353, GNorm = 0.9812, lr_0 = 9.2479e-04
Loss = 2.1754e-01, PNorm = 53.8572, GNorm = 1.7115, lr_0 = 9.2415e-04
Loss = 2.3532e-01, PNorm = 53.8835, GNorm = 2.5671, lr_0 = 9.2352e-04
Loss = 2.2911e-01, PNorm = 53.9150, GNorm = 1.9999, lr_0 = 9.2289e-04
Loss = 2.1012e-01, PNorm = 53.9450, GNorm = 1.1804, lr_0 = 9.2226e-04
Loss = 2.3657e-01, PNorm = 53.9770, GNorm = 3.9481, lr_0 = 9.2162e-04
Loss = 2.8135e-01, PNorm = 54.0125, GNorm = 1.4399, lr_0 = 9.2099e-04
Validation mae = 0.295259
Epoch 3
Loss = 1.9420e-01, PNorm = 54.0547, GNorm = 1.4281, lr_0 = 9.2036e-04
Loss = 1.9751e-01, PNorm = 54.0813, GNorm = 1.9801, lr_0 = 9.1973e-04
Loss = 2.0740e-01, PNorm = 54.1079, GNorm = 0.7561, lr_0 = 9.1910e-04
Loss = 1.8582e-01, PNorm = 54.1351, GNorm = 1.7355, lr_0 = 9.1847e-04
Loss = 2.0456e-01, PNorm = 54.1640, GNorm = 0.9919, lr_0 = 9.1784e-04
Loss = 1.7537e-01, PNorm = 54.1860, GNorm = 0.9103, lr_0 = 9.1721e-04
Loss = 2.1301e-01, PNorm = 54.2073, GNorm = 1.5936, lr_0 = 9.1658e-04
Loss = 2.0620e-01, PNorm = 54.2365, GNorm = 1.5034, lr_0 = 9.1596e-04
Loss = 2.2462e-01, PNorm = 54.2594, GNorm = 2.0367, lr_0 = 9.1533e-04
Loss = 2.4829e-01, PNorm = 54.2780, GNorm = 1.6274, lr_0 = 9.1470e-04
Loss = 2.1803e-01, PNorm = 54.3085, GNorm = 1.8828, lr_0 = 9.1408e-04
Loss = 1.8394e-01, PNorm = 54.3274, GNorm = 1.1917, lr_0 = 9.1345e-04
Loss = 2.0581e-01, PNorm = 54.3515, GNorm = 1.1525, lr_0 = 9.1282e-04
Loss = 2.1538e-01, PNorm = 54.3799, GNorm = 1.5142, lr_0 = 9.1220e-04
Loss = 1.9373e-01, PNorm = 54.4009, GNorm = 0.8615, lr_0 = 9.1157e-04
Loss = 2.3474e-01, PNorm = 54.4318, GNorm = 0.7737, lr_0 = 9.1095e-04
Loss = 1.9846e-01, PNorm = 54.4614, GNorm = 1.7629, lr_0 = 9.1032e-04
Loss = 2.1791e-01, PNorm = 54.4825, GNorm = 2.5740, lr_0 = 9.0970e-04
Loss = 2.2483e-01, PNorm = 54.5186, GNorm = 0.7896, lr_0 = 9.0908e-04
Loss = 1.7240e-01, PNorm = 54.5508, GNorm = 0.7926, lr_0 = 9.0846e-04
Loss = 1.9816e-01, PNorm = 54.5740, GNorm = 0.7776, lr_0 = 9.0783e-04
Loss = 2.1509e-01, PNorm = 54.5968, GNorm = 2.2350, lr_0 = 9.0721e-04
Loss = 2.0442e-01, PNorm = 54.6232, GNorm = 1.2502, lr_0 = 9.0659e-04
Loss = 2.2823e-01, PNorm = 54.6450, GNorm = 1.5227, lr_0 = 9.0597e-04
Loss = 1.8389e-01, PNorm = 54.6648, GNorm = 1.6787, lr_0 = 9.0535e-04
Loss = 2.1709e-01, PNorm = 54.6841, GNorm = 0.8684, lr_0 = 9.0473e-04
Loss = 1.9869e-01, PNorm = 54.7102, GNorm = 1.7420, lr_0 = 9.0411e-04
Loss = 1.8155e-01, PNorm = 54.7358, GNorm = 2.0471, lr_0 = 9.0349e-04
Loss = 2.0090e-01, PNorm = 54.7607, GNorm = 1.1011, lr_0 = 9.0287e-04
Loss = 2.0172e-01, PNorm = 54.7857, GNorm = 1.5252, lr_0 = 9.0225e-04
Loss = 2.1776e-01, PNorm = 54.8083, GNorm = 1.2877, lr_0 = 9.0163e-04
Loss = 2.1959e-01, PNorm = 54.8405, GNorm = 2.0053, lr_0 = 9.0102e-04
Loss = 2.0606e-01, PNorm = 54.8703, GNorm = 0.6976, lr_0 = 9.0040e-04
Loss = 2.3462e-01, PNorm = 54.9037, GNorm = 0.7006, lr_0 = 8.9978e-04
Loss = 2.3170e-01, PNorm = 54.9378, GNorm = 0.7139, lr_0 = 8.9916e-04
Loss = 2.1131e-01, PNorm = 54.9709, GNorm = 2.4723, lr_0 = 8.9855e-04
Loss = 2.0638e-01, PNorm = 54.9967, GNorm = 0.7365, lr_0 = 8.9793e-04
Loss = 1.8797e-01, PNorm = 55.0271, GNorm = 1.3395, lr_0 = 8.9732e-04
Loss = 2.0063e-01, PNorm = 55.0484, GNorm = 1.4247, lr_0 = 8.9670e-04
Loss = 2.1788e-01, PNorm = 55.0723, GNorm = 1.8007, lr_0 = 8.9609e-04
Loss = 1.9438e-01, PNorm = 55.1002, GNorm = 2.6173, lr_0 = 8.9548e-04
Loss = 2.0452e-01, PNorm = 55.1221, GNorm = 1.9616, lr_0 = 8.9486e-04
Loss = 1.9603e-01, PNorm = 55.1494, GNorm = 2.8222, lr_0 = 8.9425e-04
Loss = 2.5512e-01, PNorm = 55.1874, GNorm = 2.4060, lr_0 = 8.9364e-04
Loss = 2.8918e-01, PNorm = 55.2244, GNorm = 3.0064, lr_0 = 8.9302e-04
Loss = 2.3288e-01, PNorm = 55.2710, GNorm = 3.3123, lr_0 = 8.9241e-04
Loss = 2.2691e-01, PNorm = 55.2992, GNorm = 1.5409, lr_0 = 8.9180e-04
Loss = 2.3575e-01, PNorm = 55.3272, GNorm = 1.0469, lr_0 = 8.9119e-04
Loss = 2.0388e-01, PNorm = 55.3520, GNorm = 1.1285, lr_0 = 8.9058e-04
Loss = 2.1917e-01, PNorm = 55.3736, GNorm = 0.8403, lr_0 = 8.8997e-04
Loss = 2.0090e-01, PNorm = 55.3904, GNorm = 1.7254, lr_0 = 8.8936e-04
Loss = 1.9141e-01, PNorm = 55.4182, GNorm = 1.6266, lr_0 = 8.8875e-04
Loss = 1.9681e-01, PNorm = 55.4435, GNorm = 0.9119, lr_0 = 8.8814e-04
Loss = 2.1982e-01, PNorm = 55.4696, GNorm = 0.5720, lr_0 = 8.8753e-04
Loss = 2.2079e-01, PNorm = 55.4934, GNorm = 0.7266, lr_0 = 8.8693e-04
Loss = 2.1395e-01, PNorm = 55.5161, GNorm = 2.6015, lr_0 = 8.8632e-04
Loss = 1.9522e-01, PNorm = 55.5443, GNorm = 0.8485, lr_0 = 8.8571e-04
Loss = 2.3079e-01, PNorm = 55.5731, GNorm = 1.1530, lr_0 = 8.8510e-04
Loss = 2.1069e-01, PNorm = 55.5966, GNorm = 0.8209, lr_0 = 8.8450e-04
Loss = 2.2530e-01, PNorm = 55.6184, GNorm = 1.1071, lr_0 = 8.8389e-04
Loss = 2.1079e-01, PNorm = 55.6438, GNorm = 0.7888, lr_0 = 8.8329e-04
Loss = 2.2916e-01, PNorm = 55.6654, GNorm = 2.2259, lr_0 = 8.8268e-04
Loss = 2.1176e-01, PNorm = 55.6961, GNorm = 3.7853, lr_0 = 8.8208e-04
Loss = 2.1466e-01, PNorm = 55.7210, GNorm = 0.7029, lr_0 = 8.8147e-04
Loss = 1.9908e-01, PNorm = 55.7464, GNorm = 1.5425, lr_0 = 8.8087e-04
Loss = 2.0459e-01, PNorm = 55.7748, GNorm = 0.7005, lr_0 = 8.8026e-04
Loss = 1.9277e-01, PNorm = 55.7923, GNorm = 0.6643, lr_0 = 8.7966e-04
Loss = 1.7049e-01, PNorm = 55.8123, GNorm = 0.8671, lr_0 = 8.7906e-04
Loss = 1.8563e-01, PNorm = 55.8313, GNorm = 1.8161, lr_0 = 8.7846e-04
Loss = 2.3086e-01, PNorm = 55.8542, GNorm = 0.8025, lr_0 = 8.7785e-04
Loss = 2.2540e-01, PNorm = 55.8854, GNorm = 1.4811, lr_0 = 8.7725e-04
Loss = 2.1452e-01, PNorm = 55.9087, GNorm = 0.8675, lr_0 = 8.7665e-04
Loss = 2.1576e-01, PNorm = 55.9345, GNorm = 1.1085, lr_0 = 8.7605e-04
Loss = 1.9641e-01, PNorm = 55.9636, GNorm = 0.4870, lr_0 = 8.7545e-04
Loss = 2.0707e-01, PNorm = 55.9883, GNorm = 2.2789, lr_0 = 8.7485e-04
Loss = 2.0350e-01, PNorm = 56.0158, GNorm = 2.8960, lr_0 = 8.7425e-04
Loss = 1.9129e-01, PNorm = 56.0379, GNorm = 0.8234, lr_0 = 8.7365e-04
Loss = 1.9177e-01, PNorm = 56.0558, GNorm = 0.9344, lr_0 = 8.7306e-04
Loss = 1.6809e-01, PNorm = 56.0758, GNorm = 1.0990, lr_0 = 8.7246e-04
Loss = 2.2172e-01, PNorm = 56.0912, GNorm = 2.6947, lr_0 = 8.7186e-04
Loss = 2.2522e-01, PNorm = 56.1134, GNorm = 2.3871, lr_0 = 8.7126e-04
Loss = 2.1583e-01, PNorm = 56.1409, GNorm = 0.8532, lr_0 = 8.7067e-04
Loss = 2.1601e-01, PNorm = 56.1717, GNorm = 0.8223, lr_0 = 8.7007e-04
Loss = 2.0378e-01, PNorm = 56.2047, GNorm = 1.6461, lr_0 = 8.6947e-04
Loss = 1.9422e-01, PNorm = 56.2308, GNorm = 1.0003, lr_0 = 8.6888e-04
Loss = 1.8631e-01, PNorm = 56.2556, GNorm = 0.6622, lr_0 = 8.6828e-04
Loss = 1.8370e-01, PNorm = 56.2734, GNorm = 0.6682, lr_0 = 8.6769e-04
Loss = 2.1830e-01, PNorm = 56.2955, GNorm = 1.2803, lr_0 = 8.6709e-04
Loss = 2.1244e-01, PNorm = 56.3238, GNorm = 1.1711, lr_0 = 8.6650e-04
Loss = 2.0544e-01, PNorm = 56.3556, GNorm = 0.8694, lr_0 = 8.6590e-04
Loss = 2.2841e-01, PNorm = 56.3886, GNorm = 2.1207, lr_0 = 8.6531e-04
Loss = 2.0969e-01, PNorm = 56.4182, GNorm = 0.7165, lr_0 = 8.6472e-04
Loss = 1.6814e-01, PNorm = 56.4482, GNorm = 0.9831, lr_0 = 8.6413e-04
Loss = 2.0616e-01, PNorm = 56.4765, GNorm = 0.9894, lr_0 = 8.6353e-04
Loss = 2.0307e-01, PNorm = 56.5096, GNorm = 2.5783, lr_0 = 8.6294e-04
Loss = 1.8247e-01, PNorm = 56.5449, GNorm = 0.9117, lr_0 = 8.6235e-04
Loss = 1.9735e-01, PNorm = 56.5628, GNorm = 1.2535, lr_0 = 8.6176e-04
Loss = 1.9863e-01, PNorm = 56.5914, GNorm = 1.2641, lr_0 = 8.6117e-04
Loss = 1.7849e-01, PNorm = 56.6211, GNorm = 0.8306, lr_0 = 8.6058e-04
Loss = 1.8193e-01, PNorm = 56.6422, GNorm = 0.6688, lr_0 = 8.5999e-04
Loss = 1.8812e-01, PNorm = 56.6639, GNorm = 0.8701, lr_0 = 8.5940e-04
Loss = 1.9932e-01, PNorm = 56.6827, GNorm = 0.9711, lr_0 = 8.5881e-04
Loss = 2.1367e-01, PNorm = 56.7001, GNorm = 0.7881, lr_0 = 8.5823e-04
Loss = 2.0910e-01, PNorm = 56.7169, GNorm = 1.5115, lr_0 = 8.5764e-04
Loss = 2.0284e-01, PNorm = 56.7356, GNorm = 0.8809, lr_0 = 8.5705e-04
Loss = 1.8567e-01, PNorm = 56.7570, GNorm = 0.7324, lr_0 = 8.5646e-04
Loss = 1.6956e-01, PNorm = 56.7780, GNorm = 1.6608, lr_0 = 8.5588e-04
Loss = 1.8626e-01, PNorm = 56.7920, GNorm = 0.7934, lr_0 = 8.5529e-04
Loss = 2.4655e-01, PNorm = 56.8129, GNorm = 0.7848, lr_0 = 8.5470e-04
Loss = 2.0122e-01, PNorm = 56.8324, GNorm = 0.6797, lr_0 = 8.5412e-04
Loss = 1.8891e-01, PNorm = 56.8605, GNorm = 1.0827, lr_0 = 8.5353e-04
Loss = 1.8952e-01, PNorm = 56.8791, GNorm = 1.3915, lr_0 = 8.5295e-04
Loss = 2.1533e-01, PNorm = 56.9086, GNorm = 0.8422, lr_0 = 8.5236e-04
Loss = 2.0696e-01, PNorm = 56.9302, GNorm = 1.7435, lr_0 = 8.5178e-04
Loss = 2.0783e-01, PNorm = 56.9493, GNorm = 1.0941, lr_0 = 8.5120e-04
Loss = 1.9348e-01, PNorm = 56.9693, GNorm = 1.5352, lr_0 = 8.5061e-04
Loss = 1.6719e-01, PNorm = 56.9853, GNorm = 0.9415, lr_0 = 8.5003e-04
Loss = 1.8887e-01, PNorm = 57.0078, GNorm = 1.3794, lr_0 = 8.4945e-04
Loss = 1.9785e-01, PNorm = 57.0338, GNorm = 0.7125, lr_0 = 8.4887e-04
Loss = 1.8846e-01, PNorm = 57.0564, GNorm = 2.6585, lr_0 = 8.4828e-04
Validation mae = 0.264123
Epoch 4
Loss = 2.0278e-01, PNorm = 57.0789, GNorm = 1.1084, lr_0 = 8.4770e-04
Loss = 2.0534e-01, PNorm = 57.1031, GNorm = 0.8518, lr_0 = 8.4712e-04
Loss = 1.8130e-01, PNorm = 57.1252, GNorm = 0.9369, lr_0 = 8.4654e-04
Loss = 1.7201e-01, PNorm = 57.1476, GNorm = 1.3960, lr_0 = 8.4596e-04
Loss = 1.8615e-01, PNorm = 57.1664, GNorm = 0.7471, lr_0 = 8.4538e-04
Loss = 1.7688e-01, PNorm = 57.1862, GNorm = 1.7950, lr_0 = 8.4480e-04
Loss = 1.9155e-01, PNorm = 57.2086, GNorm = 0.6028, lr_0 = 8.4423e-04
Loss = 1.9538e-01, PNorm = 57.2443, GNorm = 1.3898, lr_0 = 8.4365e-04
Loss = 1.8125e-01, PNorm = 57.2593, GNorm = 0.6486, lr_0 = 8.4307e-04
Loss = 1.8165e-01, PNorm = 57.2834, GNorm = 1.4083, lr_0 = 8.4249e-04
Loss = 2.0719e-01, PNorm = 57.3071, GNorm = 2.2228, lr_0 = 8.4191e-04
Loss = 1.7552e-01, PNorm = 57.3349, GNorm = 0.7571, lr_0 = 8.4134e-04
Loss = 1.7462e-01, PNorm = 57.3608, GNorm = 2.0448, lr_0 = 8.4076e-04
Loss = 1.6683e-01, PNorm = 57.3848, GNorm = 1.4327, lr_0 = 8.4019e-04
Loss = 1.9010e-01, PNorm = 57.4017, GNorm = 0.6461, lr_0 = 8.3961e-04
Loss = 1.7345e-01, PNorm = 57.4265, GNorm = 0.9441, lr_0 = 8.3903e-04
Loss = 1.7259e-01, PNorm = 57.4493, GNorm = 1.0602, lr_0 = 8.3846e-04
Loss = 1.9809e-01, PNorm = 57.4810, GNorm = 0.9240, lr_0 = 8.3789e-04
Loss = 1.7415e-01, PNorm = 57.5006, GNorm = 1.7268, lr_0 = 8.3731e-04
Loss = 1.8014e-01, PNorm = 57.5239, GNorm = 2.0794, lr_0 = 8.3674e-04
Loss = 1.9165e-01, PNorm = 57.5499, GNorm = 1.0875, lr_0 = 8.3616e-04
Loss = 1.6956e-01, PNorm = 57.5847, GNorm = 2.3946, lr_0 = 8.3559e-04
Loss = 2.0371e-01, PNorm = 57.6159, GNorm = 0.9924, lr_0 = 8.3502e-04
Loss = 2.1057e-01, PNorm = 57.6466, GNorm = 2.5859, lr_0 = 8.3445e-04
Loss = 1.8801e-01, PNorm = 57.6688, GNorm = 0.8437, lr_0 = 8.3388e-04
Loss = 1.7597e-01, PNorm = 57.7022, GNorm = 1.2287, lr_0 = 8.3330e-04
Loss = 2.0893e-01, PNorm = 57.7367, GNorm = 0.8494, lr_0 = 8.3273e-04
Loss = 1.9176e-01, PNorm = 57.7668, GNorm = 1.8375, lr_0 = 8.3216e-04
Loss = 1.7271e-01, PNorm = 57.7868, GNorm = 2.2843, lr_0 = 8.3159e-04
Loss = 2.0360e-01, PNorm = 57.8110, GNorm = 1.4543, lr_0 = 8.3102e-04
Loss = 1.9450e-01, PNorm = 57.8389, GNorm = 0.6763, lr_0 = 8.3045e-04
Loss = 2.2455e-01, PNorm = 57.8657, GNorm = 0.7860, lr_0 = 8.2988e-04
Loss = 2.0998e-01, PNorm = 57.9028, GNorm = 1.4761, lr_0 = 8.2932e-04
Loss = 2.1005e-01, PNorm = 57.9296, GNorm = 1.4297, lr_0 = 8.2875e-04
Loss = 1.9333e-01, PNorm = 57.9502, GNorm = 0.6745, lr_0 = 8.2818e-04
Loss = 2.0291e-01, PNorm = 57.9822, GNorm = 1.2529, lr_0 = 8.2761e-04
Loss = 1.9960e-01, PNorm = 58.0022, GNorm = 0.6684, lr_0 = 8.2705e-04
Loss = 1.8069e-01, PNorm = 58.0295, GNorm = 1.2498, lr_0 = 8.2648e-04
Loss = 1.9477e-01, PNorm = 58.0471, GNorm = 1.2135, lr_0 = 8.2591e-04
Loss = 1.8826e-01, PNorm = 58.0749, GNorm = 1.9947, lr_0 = 8.2535e-04
Loss = 2.0517e-01, PNorm = 58.1081, GNorm = 0.7606, lr_0 = 8.2478e-04
Loss = 2.0574e-01, PNorm = 58.1412, GNorm = 0.8748, lr_0 = 8.2422e-04
Loss = 1.6490e-01, PNorm = 58.1663, GNorm = 0.8184, lr_0 = 8.2365e-04
Loss = 1.7649e-01, PNorm = 58.1912, GNorm = 1.2995, lr_0 = 8.2309e-04
Loss = 1.7733e-01, PNorm = 58.2106, GNorm = 0.9305, lr_0 = 8.2252e-04
Loss = 1.8265e-01, PNorm = 58.2362, GNorm = 0.8546, lr_0 = 8.2196e-04
Loss = 1.7093e-01, PNorm = 58.2649, GNorm = 1.0894, lr_0 = 8.2140e-04
Loss = 2.0269e-01, PNorm = 58.2922, GNorm = 2.3748, lr_0 = 8.2084e-04
Loss = 2.0379e-01, PNorm = 58.3256, GNorm = 1.0220, lr_0 = 8.2027e-04
Loss = 2.0215e-01, PNorm = 58.3512, GNorm = 0.7841, lr_0 = 8.1971e-04
Loss = 1.9349e-01, PNorm = 58.3652, GNorm = 1.8421, lr_0 = 8.1915e-04
Loss = 2.1272e-01, PNorm = 58.3899, GNorm = 2.7310, lr_0 = 8.1859e-04
Loss = 1.9211e-01, PNorm = 58.4185, GNorm = 1.3643, lr_0 = 8.1803e-04
Loss = 1.7785e-01, PNorm = 58.4424, GNorm = 1.1752, lr_0 = 8.1747e-04
Loss = 1.7890e-01, PNorm = 58.4641, GNorm = 0.9831, lr_0 = 8.1691e-04
Loss = 1.9518e-01, PNorm = 58.4884, GNorm = 1.2318, lr_0 = 8.1635e-04
Loss = 2.0028e-01, PNorm = 58.5156, GNorm = 2.1344, lr_0 = 8.1579e-04
Loss = 2.0241e-01, PNorm = 58.5414, GNorm = 1.0956, lr_0 = 8.1523e-04
Loss = 1.9373e-01, PNorm = 58.5722, GNorm = 1.3955, lr_0 = 8.1467e-04
Loss = 2.1168e-01, PNorm = 58.6046, GNorm = 0.5992, lr_0 = 8.1411e-04
Loss = 1.9080e-01, PNorm = 58.6321, GNorm = 1.2404, lr_0 = 8.1355e-04
Loss = 1.8814e-01, PNorm = 58.6581, GNorm = 0.9975, lr_0 = 8.1300e-04
Loss = 1.6970e-01, PNorm = 58.6838, GNorm = 0.6180, lr_0 = 8.1244e-04
Loss = 1.7811e-01, PNorm = 58.7071, GNorm = 0.6133, lr_0 = 8.1188e-04
Loss = 1.9347e-01, PNorm = 58.7226, GNorm = 1.2913, lr_0 = 8.1133e-04
Loss = 1.8565e-01, PNorm = 58.7475, GNorm = 1.0194, lr_0 = 8.1077e-04
Loss = 1.7989e-01, PNorm = 58.7714, GNorm = 0.7016, lr_0 = 8.1022e-04
Loss = 1.8835e-01, PNorm = 58.8036, GNorm = 1.5493, lr_0 = 8.0966e-04
Loss = 2.1365e-01, PNorm = 58.8218, GNorm = 1.6463, lr_0 = 8.0911e-04
Loss = 1.8565e-01, PNorm = 58.8467, GNorm = 0.6525, lr_0 = 8.0855e-04
Loss = 1.7596e-01, PNorm = 58.8670, GNorm = 0.6495, lr_0 = 8.0800e-04
Loss = 1.8332e-01, PNorm = 58.8867, GNorm = 1.0834, lr_0 = 8.0745e-04
Loss = 1.7896e-01, PNorm = 58.8981, GNorm = 0.5331, lr_0 = 8.0689e-04
Loss = 2.0687e-01, PNorm = 58.9271, GNorm = 1.2372, lr_0 = 8.0634e-04
Loss = 2.1344e-01, PNorm = 58.9617, GNorm = 0.7182, lr_0 = 8.0579e-04
Loss = 2.0377e-01, PNorm = 58.9854, GNorm = 1.0064, lr_0 = 8.0523e-04
Loss = 1.8452e-01, PNorm = 59.0129, GNorm = 0.8287, lr_0 = 8.0468e-04
Loss = 2.0413e-01, PNorm = 59.0255, GNorm = 1.7018, lr_0 = 8.0413e-04
Loss = 1.8529e-01, PNorm = 59.0489, GNorm = 1.0419, lr_0 = 8.0358e-04
Loss = 1.8072e-01, PNorm = 59.0665, GNorm = 0.5113, lr_0 = 8.0303e-04
Loss = 1.9437e-01, PNorm = 59.0991, GNorm = 0.7664, lr_0 = 8.0248e-04
Loss = 1.9985e-01, PNorm = 59.1228, GNorm = 0.8162, lr_0 = 8.0193e-04
Loss = 1.8470e-01, PNorm = 59.1413, GNorm = 0.6497, lr_0 = 8.0138e-04
Loss = 1.8838e-01, PNorm = 59.1630, GNorm = 0.6098, lr_0 = 8.0083e-04
Loss = 1.8357e-01, PNorm = 59.1811, GNorm = 1.7217, lr_0 = 8.0028e-04
Loss = 1.9683e-01, PNorm = 59.2083, GNorm = 1.0502, lr_0 = 7.9974e-04
Loss = 1.9437e-01, PNorm = 59.2310, GNorm = 0.6125, lr_0 = 7.9919e-04
Loss = 1.8831e-01, PNorm = 59.2617, GNorm = 0.7202, lr_0 = 7.9864e-04
Loss = 1.7773e-01, PNorm = 59.2799, GNorm = 0.6692, lr_0 = 7.9809e-04
Loss = 1.9224e-01, PNorm = 59.3020, GNorm = 0.7774, lr_0 = 7.9755e-04
Loss = 1.8505e-01, PNorm = 59.3181, GNorm = 2.3389, lr_0 = 7.9700e-04
Loss = 1.7869e-01, PNorm = 59.3412, GNorm = 1.0281, lr_0 = 7.9645e-04
Loss = 1.7829e-01, PNorm = 59.3640, GNorm = 1.5351, lr_0 = 7.9591e-04
Loss = 1.7186e-01, PNorm = 59.3854, GNorm = 0.7644, lr_0 = 7.9536e-04
Loss = 1.8647e-01, PNorm = 59.4105, GNorm = 2.5892, lr_0 = 7.9482e-04
Loss = 1.8368e-01, PNorm = 59.4368, GNorm = 1.6588, lr_0 = 7.9427e-04
Loss = 1.9598e-01, PNorm = 59.4614, GNorm = 1.4248, lr_0 = 7.9373e-04
Loss = 1.8367e-01, PNorm = 59.4934, GNorm = 0.8134, lr_0 = 7.9319e-04
Loss = 1.7301e-01, PNorm = 59.5103, GNorm = 1.4486, lr_0 = 7.9264e-04
Loss = 2.0152e-01, PNorm = 59.5276, GNorm = 0.7349, lr_0 = 7.9210e-04
Loss = 2.1194e-01, PNorm = 59.5546, GNorm = 0.9584, lr_0 = 7.9156e-04
Loss = 2.1075e-01, PNorm = 59.5729, GNorm = 0.9906, lr_0 = 7.9101e-04
Loss = 1.7893e-01, PNorm = 59.5946, GNorm = 1.0560, lr_0 = 7.9047e-04
Loss = 1.8618e-01, PNorm = 59.6235, GNorm = 1.1217, lr_0 = 7.8993e-04
Loss = 1.8847e-01, PNorm = 59.6489, GNorm = 2.7156, lr_0 = 7.8939e-04
Loss = 1.7418e-01, PNorm = 59.6809, GNorm = 1.2664, lr_0 = 7.8885e-04
Loss = 1.9570e-01, PNorm = 59.7084, GNorm = 1.0297, lr_0 = 7.8831e-04
Loss = 1.9719e-01, PNorm = 59.7395, GNorm = 1.5888, lr_0 = 7.8777e-04
Loss = 1.7127e-01, PNorm = 59.7572, GNorm = 1.6502, lr_0 = 7.8723e-04
Loss = 1.9873e-01, PNorm = 59.7796, GNorm = 0.8252, lr_0 = 7.8669e-04
Loss = 1.8600e-01, PNorm = 59.8045, GNorm = 0.6822, lr_0 = 7.8615e-04
Loss = 1.8414e-01, PNorm = 59.8304, GNorm = 0.9580, lr_0 = 7.8561e-04
Loss = 2.0986e-01, PNorm = 59.8449, GNorm = 1.6057, lr_0 = 7.8507e-04
Loss = 1.8445e-01, PNorm = 59.8715, GNorm = 2.0919, lr_0 = 7.8454e-04
Loss = 1.7520e-01, PNorm = 59.8930, GNorm = 0.8734, lr_0 = 7.8400e-04
Loss = 2.0142e-01, PNorm = 59.9188, GNorm = 1.8002, lr_0 = 7.8346e-04
Loss = 1.9655e-01, PNorm = 59.9474, GNorm = 0.6188, lr_0 = 7.8293e-04
Loss = 1.9439e-01, PNorm = 59.9775, GNorm = 0.9485, lr_0 = 7.8239e-04
Loss = 1.8069e-01, PNorm = 59.9988, GNorm = 2.4042, lr_0 = 7.8185e-04
Loss = 1.8736e-01, PNorm = 60.0237, GNorm = 0.7099, lr_0 = 7.8132e-04
Validation mae = 0.258366
Epoch 5
Loss = 1.7605e-01, PNorm = 60.0420, GNorm = 1.0094, lr_0 = 7.8078e-04
Loss = 1.8070e-01, PNorm = 60.0666, GNorm = 0.8585, lr_0 = 7.8025e-04
Loss = 1.6683e-01, PNorm = 60.0823, GNorm = 0.7270, lr_0 = 7.7971e-04
Loss = 1.8231e-01, PNorm = 60.1034, GNorm = 0.8850, lr_0 = 7.7918e-04
Loss = 1.6850e-01, PNorm = 60.1229, GNorm = 0.6545, lr_0 = 7.7864e-04
Loss = 1.9314e-01, PNorm = 60.1588, GNorm = 1.3580, lr_0 = 7.7811e-04
Loss = 1.6555e-01, PNorm = 60.1756, GNorm = 0.7546, lr_0 = 7.7758e-04
Loss = 1.9364e-01, PNorm = 60.1936, GNorm = 0.6967, lr_0 = 7.7705e-04
Loss = 1.7421e-01, PNorm = 60.2160, GNorm = 1.1623, lr_0 = 7.7651e-04
Loss = 1.6060e-01, PNorm = 60.2467, GNorm = 0.8373, lr_0 = 7.7598e-04
Loss = 1.7337e-01, PNorm = 60.2725, GNorm = 1.0318, lr_0 = 7.7545e-04
Loss = 1.7272e-01, PNorm = 60.2973, GNorm = 0.7856, lr_0 = 7.7492e-04
Loss = 1.6810e-01, PNorm = 60.3165, GNorm = 1.5223, lr_0 = 7.7439e-04
Loss = 1.6651e-01, PNorm = 60.3388, GNorm = 1.1137, lr_0 = 7.7386e-04
Loss = 1.5154e-01, PNorm = 60.3611, GNorm = 0.7814, lr_0 = 7.7333e-04
Loss = 1.6439e-01, PNorm = 60.3831, GNorm = 0.7488, lr_0 = 7.7280e-04
Loss = 1.8204e-01, PNorm = 60.4109, GNorm = 0.6400, lr_0 = 7.7227e-04
Loss = 1.6581e-01, PNorm = 60.4284, GNorm = 1.0328, lr_0 = 7.7174e-04
Loss = 1.8315e-01, PNorm = 60.4501, GNorm = 1.5956, lr_0 = 7.7121e-04
Loss = 1.6708e-01, PNorm = 60.4678, GNorm = 0.7215, lr_0 = 7.7068e-04
Loss = 1.6207e-01, PNorm = 60.4912, GNorm = 0.6314, lr_0 = 7.7015e-04
Loss = 1.7038e-01, PNorm = 60.5062, GNorm = 1.1463, lr_0 = 7.6963e-04
Loss = 1.5708e-01, PNorm = 60.5313, GNorm = 0.5989, lr_0 = 7.6910e-04
Loss = 1.9594e-01, PNorm = 60.5561, GNorm = 1.4855, lr_0 = 7.6857e-04
Loss = 1.7688e-01, PNorm = 60.5853, GNorm = 1.5387, lr_0 = 7.6805e-04
Loss = 1.7396e-01, PNorm = 60.6123, GNorm = 1.0400, lr_0 = 7.6752e-04
Loss = 1.6603e-01, PNorm = 60.6387, GNorm = 1.6442, lr_0 = 7.6699e-04
Loss = 1.7578e-01, PNorm = 60.6726, GNorm = 1.4602, lr_0 = 7.6647e-04
Loss = 1.8958e-01, PNorm = 60.7027, GNorm = 1.1644, lr_0 = 7.6594e-04
Loss = 1.9470e-01, PNorm = 60.7402, GNorm = 0.8712, lr_0 = 7.6542e-04
Loss = 1.7676e-01, PNorm = 60.7580, GNorm = 1.1417, lr_0 = 7.6489e-04
Loss = 1.7905e-01, PNorm = 60.7731, GNorm = 1.9503, lr_0 = 7.6437e-04
Loss = 1.6934e-01, PNorm = 60.7976, GNorm = 0.8447, lr_0 = 7.6385e-04
Loss = 1.7621e-01, PNorm = 60.8185, GNorm = 1.4735, lr_0 = 7.6332e-04
Loss = 1.7359e-01, PNorm = 60.8497, GNorm = 1.2564, lr_0 = 7.6280e-04
Loss = 1.7768e-01, PNorm = 60.8767, GNorm = 0.8990, lr_0 = 7.6228e-04
Loss = 1.9008e-01, PNorm = 60.9073, GNorm = 1.2445, lr_0 = 7.6176e-04
Loss = 1.8731e-01, PNorm = 60.9376, GNorm = 0.7221, lr_0 = 7.6123e-04
Loss = 1.8872e-01, PNorm = 60.9626, GNorm = 1.6485, lr_0 = 7.6071e-04
Loss = 1.5831e-01, PNorm = 60.9796, GNorm = 0.5127, lr_0 = 7.6019e-04
Loss = 1.5719e-01, PNorm = 61.0001, GNorm = 1.2469, lr_0 = 7.5967e-04
Loss = 1.6435e-01, PNorm = 61.0192, GNorm = 1.4667, lr_0 = 7.5915e-04
Loss = 1.7347e-01, PNorm = 61.0389, GNorm = 0.5901, lr_0 = 7.5863e-04
Loss = 1.5118e-01, PNorm = 61.0672, GNorm = 1.0737, lr_0 = 7.5811e-04
Loss = 1.7561e-01, PNorm = 61.0881, GNorm = 1.0629, lr_0 = 7.5759e-04
Loss = 1.8093e-01, PNorm = 61.1144, GNorm = 0.7432, lr_0 = 7.5707e-04
Loss = 1.7578e-01, PNorm = 61.1404, GNorm = 1.1053, lr_0 = 7.5655e-04
Loss = 1.8657e-01, PNorm = 61.1609, GNorm = 1.0523, lr_0 = 7.5603e-04
Loss = 1.6985e-01, PNorm = 61.1794, GNorm = 0.6292, lr_0 = 7.5552e-04
Loss = 1.6297e-01, PNorm = 61.1962, GNorm = 0.8031, lr_0 = 7.5500e-04
Loss = 1.9236e-01, PNorm = 61.2259, GNorm = 1.4039, lr_0 = 7.5448e-04
Loss = 1.6366e-01, PNorm = 61.2573, GNorm = 1.1824, lr_0 = 7.5397e-04
Loss = 1.5787e-01, PNorm = 61.2786, GNorm = 0.9542, lr_0 = 7.5345e-04
Loss = 1.5733e-01, PNorm = 61.2988, GNorm = 1.4374, lr_0 = 7.5293e-04
Loss = 1.7217e-01, PNorm = 61.3225, GNorm = 0.6834, lr_0 = 7.5242e-04
Loss = 1.5392e-01, PNorm = 61.3468, GNorm = 0.5392, lr_0 = 7.5190e-04
Loss = 2.0370e-01, PNorm = 61.3704, GNorm = 1.3063, lr_0 = 7.5139e-04
Loss = 1.7223e-01, PNorm = 61.3989, GNorm = 1.0337, lr_0 = 7.5087e-04
Loss = 2.1532e-01, PNorm = 61.4314, GNorm = 0.8201, lr_0 = 7.5036e-04
Loss = 1.8930e-01, PNorm = 61.4580, GNorm = 1.9552, lr_0 = 7.4984e-04
Loss = 1.8664e-01, PNorm = 61.4833, GNorm = 0.7817, lr_0 = 7.4933e-04
Loss = 1.8490e-01, PNorm = 61.5124, GNorm = 0.9254, lr_0 = 7.4882e-04
Loss = 1.8739e-01, PNorm = 61.5317, GNorm = 0.9837, lr_0 = 7.4830e-04
Loss = 2.0139e-01, PNorm = 61.5510, GNorm = 0.9506, lr_0 = 7.4779e-04
Loss = 1.7195e-01, PNorm = 61.5687, GNorm = 1.5232, lr_0 = 7.4728e-04
Loss = 1.8610e-01, PNorm = 61.5909, GNorm = 0.9277, lr_0 = 7.4677e-04
Loss = 1.6561e-01, PNorm = 61.6097, GNorm = 1.0611, lr_0 = 7.4625e-04
Loss = 1.6856e-01, PNorm = 61.6317, GNorm = 0.9401, lr_0 = 7.4574e-04
Loss = 1.9183e-01, PNorm = 61.6490, GNorm = 1.9761, lr_0 = 7.4523e-04
Loss = 1.6853e-01, PNorm = 61.6764, GNorm = 0.7537, lr_0 = 7.4472e-04
Loss = 1.7740e-01, PNorm = 61.7022, GNorm = 0.7366, lr_0 = 7.4421e-04
Loss = 1.6328e-01, PNorm = 61.7192, GNorm = 0.6558, lr_0 = 7.4370e-04
Loss = 1.7639e-01, PNorm = 61.7356, GNorm = 0.6704, lr_0 = 7.4319e-04
Loss = 1.6535e-01, PNorm = 61.7596, GNorm = 1.3575, lr_0 = 7.4268e-04
Loss = 1.7263e-01, PNorm = 61.7808, GNorm = 1.5187, lr_0 = 7.4217e-04
Loss = 1.7683e-01, PNorm = 61.8056, GNorm = 1.4070, lr_0 = 7.4167e-04
Loss = 1.6300e-01, PNorm = 61.8282, GNorm = 1.1434, lr_0 = 7.4116e-04
Loss = 1.7205e-01, PNorm = 61.8523, GNorm = 0.8069, lr_0 = 7.4065e-04
Loss = 1.8179e-01, PNorm = 61.8706, GNorm = 1.1270, lr_0 = 7.4014e-04
Loss = 1.9938e-01, PNorm = 61.8880, GNorm = 0.7364, lr_0 = 7.3964e-04
Loss = 1.7823e-01, PNorm = 61.9093, GNorm = 1.9249, lr_0 = 7.3913e-04
Loss = 1.7197e-01, PNorm = 61.9213, GNorm = 0.7037, lr_0 = 7.3862e-04
Loss = 1.7730e-01, PNorm = 61.9454, GNorm = 0.9923, lr_0 = 7.3812e-04
Loss = 1.8792e-01, PNorm = 61.9694, GNorm = 1.6093, lr_0 = 7.3761e-04
Loss = 1.8223e-01, PNorm = 61.9914, GNorm = 0.9844, lr_0 = 7.3711e-04
Loss = 1.8907e-01, PNorm = 62.0240, GNorm = 1.4876, lr_0 = 7.3660e-04
Loss = 1.9987e-01, PNorm = 62.0521, GNorm = 0.8801, lr_0 = 7.3610e-04
Loss = 1.6852e-01, PNorm = 62.0781, GNorm = 1.0274, lr_0 = 7.3559e-04
Loss = 1.7664e-01, PNorm = 62.0924, GNorm = 1.3254, lr_0 = 7.3509e-04
Loss = 1.7588e-01, PNorm = 62.1080, GNorm = 1.1329, lr_0 = 7.3458e-04
Loss = 1.8867e-01, PNorm = 62.1372, GNorm = 1.5140, lr_0 = 7.3408e-04
Loss = 1.8549e-01, PNorm = 62.1644, GNorm = 0.7198, lr_0 = 7.3358e-04
Loss = 1.7500e-01, PNorm = 62.1953, GNorm = 0.6826, lr_0 = 7.3308e-04
Loss = 1.9601e-01, PNorm = 62.2219, GNorm = 1.3794, lr_0 = 7.3257e-04
Loss = 1.8370e-01, PNorm = 62.2486, GNorm = 0.7458, lr_0 = 7.3207e-04
Loss = 1.5311e-01, PNorm = 62.2826, GNorm = 1.0998, lr_0 = 7.3157e-04
Loss = 1.7543e-01, PNorm = 62.3077, GNorm = 0.7015, lr_0 = 7.3107e-04
Loss = 1.5733e-01, PNorm = 62.3371, GNorm = 0.6525, lr_0 = 7.3057e-04
Loss = 1.9267e-01, PNorm = 62.3578, GNorm = 1.4773, lr_0 = 7.3007e-04
Loss = 1.8659e-01, PNorm = 62.3779, GNorm = 1.1213, lr_0 = 7.2957e-04
Loss = 1.5092e-01, PNorm = 62.4005, GNorm = 1.4536, lr_0 = 7.2907e-04
Loss = 1.8462e-01, PNorm = 62.4345, GNorm = 0.7142, lr_0 = 7.2857e-04
Loss = 1.7517e-01, PNorm = 62.4629, GNorm = 1.4996, lr_0 = 7.2807e-04
Loss = 1.7512e-01, PNorm = 62.4846, GNorm = 0.7698, lr_0 = 7.2757e-04
Loss = 1.7609e-01, PNorm = 62.5076, GNorm = 0.7891, lr_0 = 7.2707e-04
Loss = 1.7253e-01, PNorm = 62.5289, GNorm = 0.9136, lr_0 = 7.2657e-04
Loss = 2.0070e-01, PNorm = 62.5423, GNorm = 1.1019, lr_0 = 7.2608e-04
Loss = 2.0763e-01, PNorm = 62.5630, GNorm = 0.7028, lr_0 = 7.2558e-04
Loss = 1.8299e-01, PNorm = 62.5883, GNorm = 1.0881, lr_0 = 7.2508e-04
Loss = 1.8733e-01, PNorm = 62.6122, GNorm = 1.0413, lr_0 = 7.2458e-04
Loss = 1.8617e-01, PNorm = 62.6379, GNorm = 0.9876, lr_0 = 7.2409e-04
Loss = 1.6962e-01, PNorm = 62.6599, GNorm = 0.5267, lr_0 = 7.2359e-04
Loss = 1.7467e-01, PNorm = 62.6817, GNorm = 0.7223, lr_0 = 7.2310e-04
Loss = 1.7362e-01, PNorm = 62.7016, GNorm = 0.7917, lr_0 = 7.2260e-04
Loss = 1.7635e-01, PNorm = 62.7172, GNorm = 1.0521, lr_0 = 7.2211e-04
Loss = 1.8392e-01, PNorm = 62.7426, GNorm = 1.0236, lr_0 = 7.2161e-04
Loss = 1.7586e-01, PNorm = 62.7719, GNorm = 0.8662, lr_0 = 7.2112e-04
Loss = 1.6339e-01, PNorm = 62.7963, GNorm = 0.9947, lr_0 = 7.2062e-04
Loss = 1.8071e-01, PNorm = 62.8235, GNorm = 1.3815, lr_0 = 7.2013e-04
Loss = 1.7320e-01, PNorm = 62.8410, GNorm = 1.2611, lr_0 = 7.1964e-04
Validation mae = 0.252789
Epoch 6
Loss = 1.7121e-01, PNorm = 62.8563, GNorm = 0.6623, lr_0 = 7.1914e-04
Loss = 1.8884e-01, PNorm = 62.8752, GNorm = 1.2544, lr_0 = 7.1865e-04
Loss = 1.5847e-01, PNorm = 62.8926, GNorm = 0.9731, lr_0 = 7.1816e-04
Loss = 1.7181e-01, PNorm = 62.9143, GNorm = 1.1464, lr_0 = 7.1767e-04
Loss = 1.5717e-01, PNorm = 62.9350, GNorm = 1.8859, lr_0 = 7.1717e-04
Loss = 1.6102e-01, PNorm = 62.9541, GNorm = 0.7100, lr_0 = 7.1668e-04
Loss = 1.8442e-01, PNorm = 62.9784, GNorm = 0.7293, lr_0 = 7.1619e-04
Loss = 1.4941e-01, PNorm = 63.0011, GNorm = 0.7345, lr_0 = 7.1570e-04
Loss = 1.3455e-01, PNorm = 63.0192, GNorm = 0.9481, lr_0 = 7.1521e-04
Loss = 1.6265e-01, PNorm = 63.0360, GNorm = 1.6521, lr_0 = 7.1472e-04
Loss = 1.3444e-01, PNorm = 63.0546, GNorm = 0.6427, lr_0 = 7.1423e-04
Loss = 1.4316e-01, PNorm = 63.0749, GNorm = 1.0896, lr_0 = 7.1374e-04
Loss = 1.4996e-01, PNorm = 63.0939, GNorm = 0.8454, lr_0 = 7.1325e-04
Loss = 1.6360e-01, PNorm = 63.1161, GNorm = 0.9961, lr_0 = 7.1277e-04
Loss = 1.5829e-01, PNorm = 63.1399, GNorm = 0.8291, lr_0 = 7.1228e-04
Loss = 1.7049e-01, PNorm = 63.1635, GNorm = 1.2885, lr_0 = 7.1179e-04
Loss = 1.5225e-01, PNorm = 63.1843, GNorm = 0.6455, lr_0 = 7.1130e-04
Loss = 1.4297e-01, PNorm = 63.1956, GNorm = 0.8059, lr_0 = 7.1081e-04
Loss = 1.8575e-01, PNorm = 63.2119, GNorm = 0.5888, lr_0 = 7.1033e-04
Loss = 1.8991e-01, PNorm = 63.2275, GNorm = 0.5676, lr_0 = 7.0984e-04
Loss = 1.8265e-01, PNorm = 63.2543, GNorm = 0.6895, lr_0 = 7.0935e-04
Loss = 1.5810e-01, PNorm = 63.2789, GNorm = 0.6145, lr_0 = 7.0887e-04
Loss = 1.7346e-01, PNorm = 63.3070, GNorm = 0.6142, lr_0 = 7.0838e-04
Loss = 1.5622e-01, PNorm = 63.3369, GNorm = 1.1591, lr_0 = 7.0790e-04
Loss = 1.7069e-01, PNorm = 63.3611, GNorm = 1.8509, lr_0 = 7.0741e-04
Loss = 1.7490e-01, PNorm = 63.3860, GNorm = 1.5061, lr_0 = 7.0693e-04
Loss = 1.6669e-01, PNorm = 63.4080, GNorm = 0.9111, lr_0 = 7.0644e-04
Loss = 1.5654e-01, PNorm = 63.4284, GNorm = 0.7920, lr_0 = 7.0596e-04
Loss = 1.7262e-01, PNorm = 63.4493, GNorm = 0.8052, lr_0 = 7.0548e-04
Loss = 1.6649e-01, PNorm = 63.4765, GNorm = 0.7631, lr_0 = 7.0499e-04
Loss = 1.7074e-01, PNorm = 63.5060, GNorm = 0.7036, lr_0 = 7.0451e-04
Loss = 1.7461e-01, PNorm = 63.5325, GNorm = 1.8896, lr_0 = 7.0403e-04
Loss = 1.6638e-01, PNorm = 63.5517, GNorm = 1.0156, lr_0 = 7.0354e-04
Loss = 1.7477e-01, PNorm = 63.5672, GNorm = 0.9309, lr_0 = 7.0306e-04
Loss = 1.5240e-01, PNorm = 63.5853, GNorm = 1.7753, lr_0 = 7.0258e-04
Loss = 1.8742e-01, PNorm = 63.6056, GNorm = 2.3581, lr_0 = 7.0210e-04
Loss = 1.6813e-01, PNorm = 63.6344, GNorm = 0.9646, lr_0 = 7.0162e-04
Loss = 1.7624e-01, PNorm = 63.6664, GNorm = 1.5896, lr_0 = 7.0114e-04
Loss = 1.6776e-01, PNorm = 63.6968, GNorm = 0.4476, lr_0 = 7.0066e-04
Loss = 1.4943e-01, PNorm = 63.7190, GNorm = 1.0029, lr_0 = 7.0018e-04
Loss = 1.7616e-01, PNorm = 63.7388, GNorm = 0.8539, lr_0 = 6.9970e-04
Loss = 1.5689e-01, PNorm = 63.7582, GNorm = 0.8786, lr_0 = 6.9922e-04
Loss = 1.6129e-01, PNorm = 63.7825, GNorm = 0.6747, lr_0 = 6.9874e-04
Loss = 1.6593e-01, PNorm = 63.7962, GNorm = 1.1177, lr_0 = 6.9826e-04
Loss = 1.6186e-01, PNorm = 63.8162, GNorm = 0.8874, lr_0 = 6.9778e-04
Loss = 1.4646e-01, PNorm = 63.8415, GNorm = 1.0334, lr_0 = 6.9730e-04
Loss = 1.4832e-01, PNorm = 63.8623, GNorm = 0.9634, lr_0 = 6.9683e-04
Loss = 1.6957e-01, PNorm = 63.8808, GNorm = 1.3865, lr_0 = 6.9635e-04
Loss = 1.5084e-01, PNorm = 63.9024, GNorm = 0.9814, lr_0 = 6.9587e-04
Loss = 1.7658e-01, PNorm = 63.9245, GNorm = 1.0050, lr_0 = 6.9540e-04
Loss = 1.4095e-01, PNorm = 63.9481, GNorm = 1.0737, lr_0 = 6.9492e-04
Loss = 1.7352e-01, PNorm = 63.9674, GNorm = 0.9748, lr_0 = 6.9444e-04
Loss = 1.8261e-01, PNorm = 63.9906, GNorm = 2.4021, lr_0 = 6.9397e-04
Loss = 1.9195e-01, PNorm = 64.0172, GNorm = 0.9491, lr_0 = 6.9349e-04
Loss = 1.5748e-01, PNorm = 64.0420, GNorm = 1.0210, lr_0 = 6.9302e-04
Loss = 1.7725e-01, PNorm = 64.0588, GNorm = 0.5975, lr_0 = 6.9254e-04
Loss = 1.8378e-01, PNorm = 64.0853, GNorm = 2.2349, lr_0 = 6.9207e-04
Loss = 1.7145e-01, PNorm = 64.1149, GNorm = 1.0940, lr_0 = 6.9159e-04
Loss = 1.5581e-01, PNorm = 64.1379, GNorm = 1.0726, lr_0 = 6.9112e-04
Loss = 1.7733e-01, PNorm = 64.1572, GNorm = 0.8114, lr_0 = 6.9065e-04
Loss = 1.5518e-01, PNorm = 64.1833, GNorm = 1.7013, lr_0 = 6.9017e-04
Loss = 1.6348e-01, PNorm = 64.2043, GNorm = 1.6054, lr_0 = 6.8970e-04
Loss = 1.7203e-01, PNorm = 64.2260, GNorm = 0.8486, lr_0 = 6.8923e-04
Loss = 1.6053e-01, PNorm = 64.2506, GNorm = 1.0591, lr_0 = 6.8876e-04
Loss = 1.9094e-01, PNorm = 64.2815, GNorm = 0.7950, lr_0 = 6.8828e-04
Loss = 1.5925e-01, PNorm = 64.3082, GNorm = 0.7577, lr_0 = 6.8781e-04
Loss = 1.7246e-01, PNorm = 64.3310, GNorm = 1.3315, lr_0 = 6.8734e-04
Loss = 1.7975e-01, PNorm = 64.3520, GNorm = 1.0268, lr_0 = 6.8687e-04
Loss = 1.5010e-01, PNorm = 64.3733, GNorm = 0.6214, lr_0 = 6.8640e-04
Loss = 1.4741e-01, PNorm = 64.3935, GNorm = 0.9862, lr_0 = 6.8593e-04
Loss = 1.7880e-01, PNorm = 64.4151, GNorm = 0.6192, lr_0 = 6.8546e-04
Loss = 1.8586e-01, PNorm = 64.4374, GNorm = 1.4183, lr_0 = 6.8499e-04
Loss = 1.5799e-01, PNorm = 64.4509, GNorm = 1.7073, lr_0 = 6.8452e-04
Loss = 1.8606e-01, PNorm = 64.4671, GNorm = 0.7262, lr_0 = 6.8405e-04
Loss = 1.6255e-01, PNorm = 64.4833, GNorm = 1.0275, lr_0 = 6.8358e-04
Loss = 1.6984e-01, PNorm = 64.5059, GNorm = 1.1808, lr_0 = 6.8312e-04
Loss = 1.6583e-01, PNorm = 64.5238, GNorm = 0.6095, lr_0 = 6.8265e-04
Loss = 1.5346e-01, PNorm = 64.5462, GNorm = 1.3116, lr_0 = 6.8218e-04
Loss = 1.4981e-01, PNorm = 64.5666, GNorm = 0.5532, lr_0 = 6.8171e-04
Loss = 1.5936e-01, PNorm = 64.5858, GNorm = 0.8255, lr_0 = 6.8125e-04
Loss = 1.6255e-01, PNorm = 64.6067, GNorm = 0.6685, lr_0 = 6.8078e-04
Loss = 1.4069e-01, PNorm = 64.6236, GNorm = 1.3591, lr_0 = 6.8031e-04
Loss = 1.6632e-01, PNorm = 64.6428, GNorm = 0.7800, lr_0 = 6.7985e-04
Loss = 1.6404e-01, PNorm = 64.6732, GNorm = 0.9026, lr_0 = 6.7938e-04
Loss = 1.8317e-01, PNorm = 64.6941, GNorm = 1.1185, lr_0 = 6.7892e-04
Loss = 1.6458e-01, PNorm = 64.7127, GNorm = 1.1057, lr_0 = 6.7845e-04
Loss = 1.6391e-01, PNorm = 64.7462, GNorm = 1.0689, lr_0 = 6.7799e-04
Loss = 1.6454e-01, PNorm = 64.7672, GNorm = 1.2066, lr_0 = 6.7752e-04
Loss = 1.7141e-01, PNorm = 64.7908, GNorm = 0.7616, lr_0 = 6.7706e-04
Loss = 1.8206e-01, PNorm = 64.8058, GNorm = 1.0108, lr_0 = 6.7659e-04
Loss = 1.9354e-01, PNorm = 64.8396, GNorm = 1.9002, lr_0 = 6.7613e-04
Loss = 1.9824e-01, PNorm = 64.8733, GNorm = 0.9845, lr_0 = 6.7567e-04
Loss = 1.5671e-01, PNorm = 64.9048, GNorm = 0.7254, lr_0 = 6.7520e-04
Loss = 1.5783e-01, PNorm = 64.9242, GNorm = 0.5518, lr_0 = 6.7474e-04
Loss = 1.8095e-01, PNorm = 64.9455, GNorm = 0.6309, lr_0 = 6.7428e-04
Loss = 1.7019e-01, PNorm = 64.9750, GNorm = 1.2437, lr_0 = 6.7382e-04
Loss = 1.7853e-01, PNorm = 65.0070, GNorm = 0.9597, lr_0 = 6.7335e-04
Loss = 1.8496e-01, PNorm = 65.0327, GNorm = 1.2179, lr_0 = 6.7289e-04
Loss = 1.6010e-01, PNorm = 65.0543, GNorm = 1.1047, lr_0 = 6.7243e-04
Loss = 1.9911e-01, PNorm = 65.0841, GNorm = 1.4646, lr_0 = 6.7197e-04
Loss = 1.6549e-01, PNorm = 65.1047, GNorm = 0.7100, lr_0 = 6.7151e-04
Loss = 1.5785e-01, PNorm = 65.1270, GNorm = 0.8306, lr_0 = 6.7105e-04
Loss = 1.9288e-01, PNorm = 65.1462, GNorm = 1.8336, lr_0 = 6.7059e-04
Loss = 2.0265e-01, PNorm = 65.1698, GNorm = 0.9056, lr_0 = 6.7013e-04
Loss = 1.7312e-01, PNorm = 65.2003, GNorm = 1.4445, lr_0 = 6.6967e-04
Loss = 1.6598e-01, PNorm = 65.2236, GNorm = 1.0151, lr_0 = 6.6921e-04
Loss = 1.5629e-01, PNorm = 65.2470, GNorm = 0.7744, lr_0 = 6.6876e-04
Loss = 1.5330e-01, PNorm = 65.2693, GNorm = 0.7513, lr_0 = 6.6830e-04
Loss = 1.7508e-01, PNorm = 65.2941, GNorm = 0.8755, lr_0 = 6.6784e-04
Loss = 1.5172e-01, PNorm = 65.3204, GNorm = 0.5762, lr_0 = 6.6738e-04
Loss = 2.0108e-01, PNorm = 65.3405, GNorm = 1.7025, lr_0 = 6.6693e-04
Loss = 1.5560e-01, PNorm = 65.3601, GNorm = 0.7352, lr_0 = 6.6647e-04
Loss = 1.5613e-01, PNorm = 65.3703, GNorm = 0.6903, lr_0 = 6.6601e-04
Loss = 1.8224e-01, PNorm = 65.3875, GNorm = 0.7195, lr_0 = 6.6556e-04
Loss = 1.3473e-01, PNorm = 65.4106, GNorm = 0.6746, lr_0 = 6.6510e-04
Loss = 1.5834e-01, PNorm = 65.4263, GNorm = 0.5364, lr_0 = 6.6464e-04
Loss = 1.6918e-01, PNorm = 65.4492, GNorm = 0.6173, lr_0 = 6.6419e-04
Loss = 1.8394e-01, PNorm = 65.4677, GNorm = 0.8151, lr_0 = 6.6373e-04
Loss = 1.7840e-01, PNorm = 65.4936, GNorm = 0.9453, lr_0 = 6.6328e-04
Loss = 1.7267e-01, PNorm = 65.5150, GNorm = 0.5914, lr_0 = 6.6282e-04
Validation mae = 0.252590
Epoch 7
Loss = 1.5493e-01, PNorm = 65.5371, GNorm = 0.6889, lr_0 = 6.6237e-04
Loss = 1.5469e-01, PNorm = 65.5579, GNorm = 0.5994, lr_0 = 6.6192e-04
Loss = 1.4112e-01, PNorm = 65.5843, GNorm = 0.5984, lr_0 = 6.6146e-04
Loss = 1.6198e-01, PNorm = 65.6061, GNorm = 0.6629, lr_0 = 6.6101e-04
Loss = 1.5474e-01, PNorm = 65.6195, GNorm = 0.7312, lr_0 = 6.6056e-04
Loss = 1.3081e-01, PNorm = 65.6395, GNorm = 0.5795, lr_0 = 6.6011e-04
Loss = 1.5679e-01, PNorm = 65.6548, GNorm = 0.8373, lr_0 = 6.5965e-04
Loss = 1.5692e-01, PNorm = 65.6768, GNorm = 0.9497, lr_0 = 6.5920e-04
Loss = 1.7451e-01, PNorm = 65.6966, GNorm = 1.4309, lr_0 = 6.5875e-04
Loss = 1.3476e-01, PNorm = 65.7225, GNorm = 0.6383, lr_0 = 6.5830e-04
Loss = 1.5779e-01, PNorm = 65.7354, GNorm = 1.5167, lr_0 = 6.5785e-04
Loss = 1.4617e-01, PNorm = 65.7551, GNorm = 0.7326, lr_0 = 6.5740e-04
Loss = 1.5206e-01, PNorm = 65.7760, GNorm = 1.1663, lr_0 = 6.5695e-04
Loss = 1.4429e-01, PNorm = 65.7983, GNorm = 0.7923, lr_0 = 6.5650e-04
Loss = 1.5589e-01, PNorm = 65.8229, GNorm = 1.7909, lr_0 = 6.5605e-04
Loss = 1.6988e-01, PNorm = 65.8492, GNorm = 1.6070, lr_0 = 6.5560e-04
Loss = 1.5977e-01, PNorm = 65.8729, GNorm = 0.8553, lr_0 = 6.5515e-04
Loss = 1.7335e-01, PNorm = 65.9004, GNorm = 2.3174, lr_0 = 6.5470e-04
Loss = 1.7354e-01, PNorm = 65.9143, GNorm = 0.8497, lr_0 = 6.5425e-04
Loss = 1.5301e-01, PNorm = 65.9403, GNorm = 0.7597, lr_0 = 6.5380e-04
Loss = 1.4699e-01, PNorm = 65.9644, GNorm = 0.6579, lr_0 = 6.5335e-04
Loss = 1.7099e-01, PNorm = 65.9873, GNorm = 0.8305, lr_0 = 6.5291e-04
Loss = 1.4200e-01, PNorm = 66.0074, GNorm = 2.0291, lr_0 = 6.5246e-04
Loss = 1.7069e-01, PNorm = 66.0293, GNorm = 1.7577, lr_0 = 6.5201e-04
Loss = 1.7540e-01, PNorm = 66.0624, GNorm = 0.8091, lr_0 = 6.5157e-04
Loss = 1.6400e-01, PNorm = 66.0793, GNorm = 0.6457, lr_0 = 6.5112e-04
Loss = 1.6204e-01, PNorm = 66.1013, GNorm = 0.7153, lr_0 = 6.5067e-04
Loss = 1.4271e-01, PNorm = 66.1274, GNorm = 0.9377, lr_0 = 6.5023e-04
Loss = 1.5182e-01, PNorm = 66.1498, GNorm = 1.0236, lr_0 = 6.4978e-04
Loss = 1.4885e-01, PNorm = 66.1718, GNorm = 0.6421, lr_0 = 6.4934e-04
Loss = 1.8179e-01, PNorm = 66.1893, GNorm = 0.8935, lr_0 = 6.4889e-04
Loss = 1.6194e-01, PNorm = 66.2105, GNorm = 0.5817, lr_0 = 6.4845e-04
Loss = 1.6101e-01, PNorm = 66.2368, GNorm = 0.9499, lr_0 = 6.4800e-04
Loss = 1.4936e-01, PNorm = 66.2610, GNorm = 0.9294, lr_0 = 6.4756e-04
Loss = 1.7216e-01, PNorm = 66.2867, GNorm = 2.3399, lr_0 = 6.4712e-04
Loss = 1.7461e-01, PNorm = 66.3128, GNorm = 1.2846, lr_0 = 6.4667e-04
Loss = 1.4893e-01, PNorm = 66.3411, GNorm = 1.0514, lr_0 = 6.4623e-04
Loss = 1.6499e-01, PNorm = 66.3581, GNorm = 1.3829, lr_0 = 6.4579e-04
Loss = 1.6022e-01, PNorm = 66.3791, GNorm = 0.8095, lr_0 = 6.4534e-04
Loss = 1.5208e-01, PNorm = 66.4038, GNorm = 0.8352, lr_0 = 6.4490e-04
Loss = 1.5907e-01, PNorm = 66.4209, GNorm = 0.7687, lr_0 = 6.4446e-04
Loss = 1.3996e-01, PNorm = 66.4350, GNorm = 0.7542, lr_0 = 6.4402e-04
Loss = 1.5120e-01, PNorm = 66.4542, GNorm = 0.6678, lr_0 = 6.4358e-04
Loss = 1.6698e-01, PNorm = 66.4696, GNorm = 1.0167, lr_0 = 6.4314e-04
Loss = 1.4891e-01, PNorm = 66.4819, GNorm = 1.1932, lr_0 = 6.4270e-04
Loss = 1.5313e-01, PNorm = 66.5027, GNorm = 0.7616, lr_0 = 6.4226e-04
Loss = 1.4753e-01, PNorm = 66.5138, GNorm = 0.6476, lr_0 = 6.4182e-04
Loss = 1.4160e-01, PNorm = 66.5242, GNorm = 0.6610, lr_0 = 6.4138e-04
Loss = 1.6054e-01, PNorm = 66.5335, GNorm = 0.9743, lr_0 = 6.4094e-04
Loss = 1.6760e-01, PNorm = 66.5588, GNorm = 0.7086, lr_0 = 6.4050e-04
Loss = 1.7477e-01, PNorm = 66.5836, GNorm = 0.6423, lr_0 = 6.4006e-04
Loss = 1.6639e-01, PNorm = 66.6115, GNorm = 1.1040, lr_0 = 6.3962e-04
Loss = 1.6516e-01, PNorm = 66.6374, GNorm = 0.6188, lr_0 = 6.3918e-04
Loss = 1.6644e-01, PNorm = 66.6639, GNorm = 1.3868, lr_0 = 6.3874e-04
Loss = 1.7693e-01, PNorm = 66.6817, GNorm = 1.0045, lr_0 = 6.3831e-04
Loss = 1.6093e-01, PNorm = 66.7047, GNorm = 0.8116, lr_0 = 6.3787e-04
Loss = 1.4562e-01, PNorm = 66.7234, GNorm = 0.6855, lr_0 = 6.3743e-04
Loss = 1.5099e-01, PNorm = 66.7453, GNorm = 0.7802, lr_0 = 6.3700e-04
Loss = 1.7379e-01, PNorm = 66.7648, GNorm = 0.7193, lr_0 = 6.3656e-04
Loss = 1.3839e-01, PNorm = 66.7865, GNorm = 0.6945, lr_0 = 6.3612e-04
Loss = 1.4727e-01, PNorm = 66.7977, GNorm = 0.7726, lr_0 = 6.3569e-04
Loss = 1.6219e-01, PNorm = 66.8172, GNorm = 0.6983, lr_0 = 6.3525e-04
Loss = 1.5972e-01, PNorm = 66.8399, GNorm = 1.0725, lr_0 = 6.3482e-04
Loss = 1.6554e-01, PNorm = 66.8678, GNorm = 1.1519, lr_0 = 6.3438e-04
Loss = 1.4796e-01, PNorm = 66.8911, GNorm = 0.8985, lr_0 = 6.3395e-04
Loss = 1.4718e-01, PNorm = 66.9034, GNorm = 0.5312, lr_0 = 6.3351e-04
Loss = 1.5833e-01, PNorm = 66.9154, GNorm = 0.7422, lr_0 = 6.3308e-04
Loss = 1.5252e-01, PNorm = 66.9309, GNorm = 1.6514, lr_0 = 6.3265e-04
Loss = 1.5432e-01, PNorm = 66.9488, GNorm = 1.7587, lr_0 = 6.3221e-04
Loss = 1.6387e-01, PNorm = 66.9624, GNorm = 0.8333, lr_0 = 6.3178e-04
Loss = 1.5440e-01, PNorm = 66.9885, GNorm = 0.6024, lr_0 = 6.3135e-04
Loss = 1.4814e-01, PNorm = 67.0049, GNorm = 1.1930, lr_0 = 6.3091e-04
Loss = 1.6758e-01, PNorm = 67.0302, GNorm = 1.0149, lr_0 = 6.3048e-04
Loss = 1.5128e-01, PNorm = 67.0515, GNorm = 1.1255, lr_0 = 6.3005e-04
Loss = 1.5071e-01, PNorm = 67.0652, GNorm = 0.4685, lr_0 = 6.2962e-04
Loss = 1.3656e-01, PNorm = 67.0795, GNorm = 0.6883, lr_0 = 6.2919e-04
Loss = 1.5838e-01, PNorm = 67.0992, GNorm = 0.8030, lr_0 = 6.2876e-04
Loss = 1.3783e-01, PNorm = 67.1181, GNorm = 1.0651, lr_0 = 6.2833e-04
Loss = 1.5829e-01, PNorm = 67.1381, GNorm = 1.1401, lr_0 = 6.2789e-04
Loss = 1.5403e-01, PNorm = 67.1644, GNorm = 1.3036, lr_0 = 6.2746e-04
Loss = 1.4209e-01, PNorm = 67.1964, GNorm = 0.6634, lr_0 = 6.2703e-04
Loss = 1.9737e-01, PNorm = 67.2170, GNorm = 0.8739, lr_0 = 6.2661e-04
Loss = 1.5166e-01, PNorm = 67.2350, GNorm = 0.8105, lr_0 = 6.2618e-04
Loss = 1.3539e-01, PNorm = 67.2430, GNorm = 0.6544, lr_0 = 6.2575e-04
Loss = 1.7314e-01, PNorm = 67.2640, GNorm = 0.9576, lr_0 = 6.2532e-04
Loss = 1.6626e-01, PNorm = 67.2872, GNorm = 0.9062, lr_0 = 6.2489e-04
Loss = 1.5418e-01, PNorm = 67.3137, GNorm = 0.9374, lr_0 = 6.2446e-04
Loss = 1.5287e-01, PNorm = 67.3386, GNorm = 0.7482, lr_0 = 6.2403e-04
Loss = 1.9103e-01, PNorm = 67.3578, GNorm = 0.8202, lr_0 = 6.2361e-04
Loss = 1.5327e-01, PNorm = 67.3782, GNorm = 0.8041, lr_0 = 6.2318e-04
Loss = 1.6675e-01, PNorm = 67.4007, GNorm = 0.8114, lr_0 = 6.2275e-04
Loss = 1.3396e-01, PNorm = 67.4213, GNorm = 0.9941, lr_0 = 6.2233e-04
Loss = 1.5743e-01, PNorm = 67.4402, GNorm = 0.5879, lr_0 = 6.2190e-04
Loss = 1.5912e-01, PNorm = 67.4587, GNorm = 0.8906, lr_0 = 6.2147e-04
Loss = 1.7948e-01, PNorm = 67.4784, GNorm = 0.5303, lr_0 = 6.2105e-04
Loss = 1.8898e-01, PNorm = 67.4874, GNorm = 0.7712, lr_0 = 6.2062e-04
Loss = 1.4910e-01, PNorm = 67.5068, GNorm = 1.2287, lr_0 = 6.2020e-04
Loss = 1.6939e-01, PNorm = 67.5188, GNorm = 0.7998, lr_0 = 6.1977e-04
Loss = 1.6958e-01, PNorm = 67.5379, GNorm = 0.6134, lr_0 = 6.1935e-04
Loss = 1.5570e-01, PNorm = 67.5548, GNorm = 0.9990, lr_0 = 6.1892e-04
Loss = 1.6452e-01, PNorm = 67.5738, GNorm = 1.1185, lr_0 = 6.1850e-04
Loss = 1.4878e-01, PNorm = 67.5895, GNorm = 0.7634, lr_0 = 6.1808e-04
Loss = 1.8071e-01, PNorm = 67.6075, GNorm = 0.7978, lr_0 = 6.1765e-04
Loss = 1.5926e-01, PNorm = 67.6266, GNorm = 0.6308, lr_0 = 6.1723e-04
Loss = 1.5574e-01, PNorm = 67.6483, GNorm = 0.8709, lr_0 = 6.1681e-04
Loss = 1.5092e-01, PNorm = 67.6695, GNorm = 1.4830, lr_0 = 6.1638e-04
Loss = 1.6276e-01, PNorm = 67.6901, GNorm = 0.4917, lr_0 = 6.1596e-04
Loss = 1.6208e-01, PNorm = 67.7149, GNorm = 0.7228, lr_0 = 6.1554e-04
Loss = 1.5047e-01, PNorm = 67.7312, GNorm = 1.0131, lr_0 = 6.1512e-04
Loss = 1.7586e-01, PNorm = 67.7493, GNorm = 1.1583, lr_0 = 6.1470e-04
Loss = 1.7157e-01, PNorm = 67.7660, GNorm = 1.2270, lr_0 = 6.1428e-04
Loss = 1.4499e-01, PNorm = 67.7839, GNorm = 0.6858, lr_0 = 6.1385e-04
Loss = 1.5899e-01, PNorm = 67.8033, GNorm = 0.7289, lr_0 = 6.1343e-04
Loss = 1.6040e-01, PNorm = 67.8228, GNorm = 0.9771, lr_0 = 6.1301e-04
Loss = 1.5858e-01, PNorm = 67.8456, GNorm = 0.7730, lr_0 = 6.1259e-04
Loss = 1.6263e-01, PNorm = 67.8683, GNorm = 1.0172, lr_0 = 6.1217e-04
Loss = 1.4694e-01, PNorm = 67.8866, GNorm = 0.5519, lr_0 = 6.1175e-04
Loss = 1.5815e-01, PNorm = 67.9029, GNorm = 1.3606, lr_0 = 6.1134e-04
Loss = 1.5510e-01, PNorm = 67.9179, GNorm = 0.5763, lr_0 = 6.1092e-04
Loss = 1.6696e-01, PNorm = 67.9451, GNorm = 0.8401, lr_0 = 6.1050e-04
Validation mae = 0.248788
Epoch 8
Loss = 1.4688e-01, PNorm = 67.9728, GNorm = 0.7607, lr_0 = 6.1008e-04
Loss = 1.5541e-01, PNorm = 67.9999, GNorm = 1.6857, lr_0 = 6.0966e-04
Loss = 1.5692e-01, PNorm = 68.0227, GNorm = 1.3294, lr_0 = 6.0924e-04
Loss = 1.5712e-01, PNorm = 68.0504, GNorm = 0.4509, lr_0 = 6.0883e-04
Loss = 1.4238e-01, PNorm = 68.0738, GNorm = 0.7686, lr_0 = 6.0841e-04
Loss = 1.4978e-01, PNorm = 68.0880, GNorm = 1.3165, lr_0 = 6.0799e-04
Loss = 1.3859e-01, PNorm = 68.1059, GNorm = 0.5073, lr_0 = 6.0758e-04
Loss = 1.2793e-01, PNorm = 68.1175, GNorm = 0.9206, lr_0 = 6.0716e-04
Loss = 1.3274e-01, PNorm = 68.1373, GNorm = 0.7055, lr_0 = 6.0674e-04
Loss = 1.3595e-01, PNorm = 68.1532, GNorm = 0.4632, lr_0 = 6.0633e-04
Loss = 1.3374e-01, PNorm = 68.1716, GNorm = 1.0862, lr_0 = 6.0591e-04
Loss = 1.3368e-01, PNorm = 68.1887, GNorm = 0.7848, lr_0 = 6.0550e-04
Loss = 1.4769e-01, PNorm = 68.2066, GNorm = 0.6922, lr_0 = 6.0508e-04
Loss = 1.4355e-01, PNorm = 68.2241, GNorm = 0.8197, lr_0 = 6.0467e-04
Loss = 1.6857e-01, PNorm = 68.2436, GNorm = 1.1561, lr_0 = 6.0425e-04
Loss = 1.3933e-01, PNorm = 68.2621, GNorm = 0.6892, lr_0 = 6.0384e-04
Loss = 1.7817e-01, PNorm = 68.2908, GNorm = 1.5132, lr_0 = 6.0343e-04
Loss = 1.5227e-01, PNorm = 68.3168, GNorm = 1.0304, lr_0 = 6.0301e-04
Loss = 1.7212e-01, PNorm = 68.3388, GNorm = 1.3565, lr_0 = 6.0260e-04
Loss = 1.4195e-01, PNorm = 68.3553, GNorm = 0.5012, lr_0 = 6.0219e-04
Loss = 1.3218e-01, PNorm = 68.3723, GNorm = 1.3685, lr_0 = 6.0178e-04
Loss = 1.5570e-01, PNorm = 68.3874, GNorm = 0.6197, lr_0 = 6.0136e-04
Loss = 1.4746e-01, PNorm = 68.4014, GNorm = 0.7150, lr_0 = 6.0095e-04
Loss = 1.5019e-01, PNorm = 68.4196, GNorm = 0.9662, lr_0 = 6.0054e-04
Loss = 1.5757e-01, PNorm = 68.4454, GNorm = 1.7008, lr_0 = 6.0013e-04
Loss = 1.6012e-01, PNorm = 68.4716, GNorm = 0.9075, lr_0 = 5.9972e-04
Loss = 1.4609e-01, PNorm = 68.4975, GNorm = 0.7153, lr_0 = 5.9931e-04
Loss = 1.7253e-01, PNorm = 68.5221, GNorm = 0.7569, lr_0 = 5.9890e-04
Loss = 1.3360e-01, PNorm = 68.5426, GNorm = 0.5876, lr_0 = 5.9849e-04
Loss = 1.5528e-01, PNorm = 68.5658, GNorm = 0.9454, lr_0 = 5.9808e-04
Loss = 1.6492e-01, PNorm = 68.5869, GNorm = 0.5813, lr_0 = 5.9767e-04
Loss = 1.3971e-01, PNorm = 68.6069, GNorm = 0.5887, lr_0 = 5.9726e-04
Loss = 1.4184e-01, PNorm = 68.6282, GNorm = 0.7379, lr_0 = 5.9685e-04
Loss = 1.4997e-01, PNorm = 68.6503, GNorm = 1.4212, lr_0 = 5.9644e-04
Loss = 1.6028e-01, PNorm = 68.6755, GNorm = 1.0182, lr_0 = 5.9603e-04
Loss = 1.5665e-01, PNorm = 68.6910, GNorm = 0.7801, lr_0 = 5.9562e-04
Loss = 1.6022e-01, PNorm = 68.7093, GNorm = 0.9158, lr_0 = 5.9521e-04
Loss = 1.4068e-01, PNorm = 68.7205, GNorm = 1.0054, lr_0 = 5.9481e-04
Loss = 1.6021e-01, PNorm = 68.7386, GNorm = 0.8325, lr_0 = 5.9440e-04
Loss = 1.4704e-01, PNorm = 68.7591, GNorm = 0.5654, lr_0 = 5.9399e-04
Loss = 1.4618e-01, PNorm = 68.7757, GNorm = 0.8227, lr_0 = 5.9358e-04
Loss = 1.3870e-01, PNorm = 68.8053, GNorm = 0.7447, lr_0 = 5.9318e-04
Loss = 1.5958e-01, PNorm = 68.8268, GNorm = 0.6418, lr_0 = 5.9277e-04
Loss = 1.6083e-01, PNorm = 68.8448, GNorm = 1.5233, lr_0 = 5.9236e-04
Loss = 1.5983e-01, PNorm = 68.8622, GNorm = 0.9928, lr_0 = 5.9196e-04
Loss = 1.4353e-01, PNorm = 68.8794, GNorm = 0.6649, lr_0 = 5.9155e-04
Loss = 1.3966e-01, PNorm = 68.8984, GNorm = 0.6644, lr_0 = 5.9115e-04
Loss = 1.7664e-01, PNorm = 68.9115, GNorm = 1.8504, lr_0 = 5.9074e-04
Loss = 1.4445e-01, PNorm = 68.9286, GNorm = 1.1958, lr_0 = 5.9034e-04
Loss = 1.5624e-01, PNorm = 68.9510, GNorm = 0.8687, lr_0 = 5.8993e-04
Loss = 1.5566e-01, PNorm = 68.9720, GNorm = 1.0364, lr_0 = 5.8953e-04
Loss = 1.4367e-01, PNorm = 68.9918, GNorm = 0.5635, lr_0 = 5.8913e-04
Loss = 1.3926e-01, PNorm = 69.0081, GNorm = 1.0000, lr_0 = 5.8872e-04
Loss = 1.5184e-01, PNorm = 69.0241, GNorm = 0.6823, lr_0 = 5.8832e-04
Loss = 1.4928e-01, PNorm = 69.0441, GNorm = 1.0774, lr_0 = 5.8792e-04
Loss = 1.4648e-01, PNorm = 69.0621, GNorm = 0.8337, lr_0 = 5.8751e-04
Loss = 1.5005e-01, PNorm = 69.0859, GNorm = 1.2941, lr_0 = 5.8711e-04
Loss = 1.4163e-01, PNorm = 69.1081, GNorm = 1.4473, lr_0 = 5.8671e-04
Loss = 1.7366e-01, PNorm = 69.1247, GNorm = 1.8544, lr_0 = 5.8631e-04
Loss = 1.8518e-01, PNorm = 69.1508, GNorm = 1.1166, lr_0 = 5.8591e-04
Loss = 1.5954e-01, PNorm = 69.1668, GNorm = 0.7977, lr_0 = 5.8550e-04
Loss = 1.2546e-01, PNorm = 69.1852, GNorm = 0.5355, lr_0 = 5.8510e-04
Loss = 1.3413e-01, PNorm = 69.2034, GNorm = 0.5073, lr_0 = 5.8470e-04
Loss = 1.4213e-01, PNorm = 69.2226, GNorm = 0.8203, lr_0 = 5.8430e-04
Loss = 1.2684e-01, PNorm = 69.2382, GNorm = 0.5281, lr_0 = 5.8390e-04
Loss = 1.5891e-01, PNorm = 69.2534, GNorm = 0.8051, lr_0 = 5.8350e-04
Loss = 1.4020e-01, PNorm = 69.2703, GNorm = 0.6743, lr_0 = 5.8310e-04
Loss = 1.5115e-01, PNorm = 69.2876, GNorm = 0.6069, lr_0 = 5.8270e-04
Loss = 1.3361e-01, PNorm = 69.2979, GNorm = 0.6313, lr_0 = 5.8230e-04
Loss = 1.4797e-01, PNorm = 69.3110, GNorm = 0.9321, lr_0 = 5.8190e-04
Loss = 1.4395e-01, PNorm = 69.3276, GNorm = 0.6847, lr_0 = 5.8151e-04
Loss = 1.4059e-01, PNorm = 69.3500, GNorm = 0.6325, lr_0 = 5.8111e-04
Loss = 1.4019e-01, PNorm = 69.3720, GNorm = 0.8320, lr_0 = 5.8071e-04
Loss = 1.4832e-01, PNorm = 69.4019, GNorm = 1.1265, lr_0 = 5.8031e-04
Loss = 1.5852e-01, PNorm = 69.4251, GNorm = 0.7985, lr_0 = 5.7991e-04
Loss = 1.4608e-01, PNorm = 69.4470, GNorm = 1.1558, lr_0 = 5.7952e-04
Loss = 1.6263e-01, PNorm = 69.4594, GNorm = 0.6454, lr_0 = 5.7912e-04
Loss = 1.5376e-01, PNorm = 69.4739, GNorm = 0.6004, lr_0 = 5.7872e-04
Loss = 1.2346e-01, PNorm = 69.4905, GNorm = 0.7638, lr_0 = 5.7833e-04
Loss = 1.6719e-01, PNorm = 69.5028, GNorm = 0.7464, lr_0 = 5.7793e-04
Loss = 1.5492e-01, PNorm = 69.5222, GNorm = 0.7479, lr_0 = 5.7753e-04
Loss = 1.4630e-01, PNorm = 69.5360, GNorm = 0.9476, lr_0 = 5.7714e-04
Loss = 1.4402e-01, PNorm = 69.5485, GNorm = 0.7915, lr_0 = 5.7674e-04
Loss = 1.4197e-01, PNorm = 69.5665, GNorm = 0.6189, lr_0 = 5.7635e-04
Loss = 1.3479e-01, PNorm = 69.5860, GNorm = 0.8815, lr_0 = 5.7595e-04
Loss = 1.2675e-01, PNorm = 69.6010, GNorm = 0.5088, lr_0 = 5.7556e-04
Loss = 1.3770e-01, PNorm = 69.6122, GNorm = 1.0042, lr_0 = 5.7516e-04
Loss = 1.4941e-01, PNorm = 69.6285, GNorm = 0.6200, lr_0 = 5.7477e-04
Loss = 1.3976e-01, PNorm = 69.6462, GNorm = 0.9800, lr_0 = 5.7438e-04
Loss = 1.8717e-01, PNorm = 69.6633, GNorm = 1.0642, lr_0 = 5.7398e-04
Loss = 1.4080e-01, PNorm = 69.6858, GNorm = 0.7728, lr_0 = 5.7359e-04
Loss = 1.5358e-01, PNorm = 69.7015, GNorm = 0.8048, lr_0 = 5.7320e-04
Loss = 1.5450e-01, PNorm = 69.7125, GNorm = 0.9886, lr_0 = 5.7280e-04
Loss = 1.5204e-01, PNorm = 69.7342, GNorm = 1.2152, lr_0 = 5.7241e-04
Loss = 1.3863e-01, PNorm = 69.7473, GNorm = 1.0105, lr_0 = 5.7202e-04
Loss = 1.5685e-01, PNorm = 69.7669, GNorm = 0.8491, lr_0 = 5.7163e-04
Loss = 1.5744e-01, PNorm = 69.7856, GNorm = 0.6155, lr_0 = 5.7124e-04
Loss = 1.4134e-01, PNorm = 69.8047, GNorm = 0.6100, lr_0 = 5.7084e-04
Loss = 1.4221e-01, PNorm = 69.8172, GNorm = 1.1578, lr_0 = 5.7045e-04
Loss = 1.4558e-01, PNorm = 69.8336, GNorm = 0.7591, lr_0 = 5.7006e-04
Loss = 1.4875e-01, PNorm = 69.8502, GNorm = 0.6545, lr_0 = 5.6967e-04
Loss = 1.6306e-01, PNorm = 69.8690, GNorm = 0.7545, lr_0 = 5.6928e-04
Loss = 1.4531e-01, PNorm = 69.8893, GNorm = 0.8072, lr_0 = 5.6889e-04
Loss = 1.5437e-01, PNorm = 69.9088, GNorm = 0.9224, lr_0 = 5.6850e-04
Loss = 1.6778e-01, PNorm = 69.9234, GNorm = 0.5598, lr_0 = 5.6811e-04
Loss = 1.3907e-01, PNorm = 69.9363, GNorm = 0.4849, lr_0 = 5.6772e-04
Loss = 1.6021e-01, PNorm = 69.9488, GNorm = 0.7957, lr_0 = 5.6733e-04
Loss = 1.4719e-01, PNorm = 69.9677, GNorm = 0.7907, lr_0 = 5.6695e-04
Loss = 1.3896e-01, PNorm = 69.9890, GNorm = 0.7886, lr_0 = 5.6656e-04
Loss = 1.7833e-01, PNorm = 70.0043, GNorm = 1.5895, lr_0 = 5.6617e-04
Loss = 1.5576e-01, PNorm = 70.0303, GNorm = 1.2881, lr_0 = 5.6578e-04
Loss = 1.6268e-01, PNorm = 70.0504, GNorm = 1.0693, lr_0 = 5.6539e-04
Loss = 1.3507e-01, PNorm = 70.0616, GNorm = 1.0537, lr_0 = 5.6501e-04
Loss = 1.3539e-01, PNorm = 70.0747, GNorm = 0.6012, lr_0 = 5.6462e-04
Loss = 1.3681e-01, PNorm = 70.0905, GNorm = 0.5712, lr_0 = 5.6423e-04
Loss = 1.4403e-01, PNorm = 70.1069, GNorm = 1.0470, lr_0 = 5.6385e-04
Loss = 1.6844e-01, PNorm = 70.1260, GNorm = 0.7380, lr_0 = 5.6346e-04
Loss = 1.3936e-01, PNorm = 70.1411, GNorm = 0.7397, lr_0 = 5.6307e-04
Loss = 1.6371e-01, PNorm = 70.1581, GNorm = 1.0185, lr_0 = 5.6269e-04
Loss = 1.5061e-01, PNorm = 70.1663, GNorm = 0.6839, lr_0 = 5.6230e-04
Validation mae = 0.244866
Epoch 9
Loss = 1.4530e-01, PNorm = 70.1816, GNorm = 0.8030, lr_0 = 5.6192e-04
Loss = 1.3471e-01, PNorm = 70.2024, GNorm = 0.7473, lr_0 = 5.6153e-04
Loss = 1.4039e-01, PNorm = 70.2262, GNorm = 0.6216, lr_0 = 5.6115e-04
Loss = 1.1053e-01, PNorm = 70.2460, GNorm = 0.5270, lr_0 = 5.6076e-04
Loss = 1.2475e-01, PNorm = 70.2620, GNorm = 0.5821, lr_0 = 5.6038e-04
Loss = 1.4128e-01, PNorm = 70.2754, GNorm = 0.6768, lr_0 = 5.6000e-04
Loss = 1.2283e-01, PNorm = 70.2911, GNorm = 0.5439, lr_0 = 5.5961e-04
Loss = 1.3208e-01, PNorm = 70.3096, GNorm = 0.6450, lr_0 = 5.5923e-04
Loss = 1.2962e-01, PNorm = 70.3269, GNorm = 0.8498, lr_0 = 5.5885e-04
Loss = 1.3873e-01, PNorm = 70.3462, GNorm = 0.7880, lr_0 = 5.5846e-04
Loss = 1.2496e-01, PNorm = 70.3659, GNorm = 0.6081, lr_0 = 5.5808e-04
Loss = 1.3156e-01, PNorm = 70.3820, GNorm = 0.5979, lr_0 = 5.5770e-04
Loss = 1.4548e-01, PNorm = 70.4008, GNorm = 0.6082, lr_0 = 5.5732e-04
Loss = 1.2889e-01, PNorm = 70.4210, GNorm = 0.8983, lr_0 = 5.5693e-04
Loss = 1.4052e-01, PNorm = 70.4383, GNorm = 0.5944, lr_0 = 5.5655e-04
Loss = 1.3672e-01, PNorm = 70.4533, GNorm = 0.5706, lr_0 = 5.5617e-04
Loss = 1.3039e-01, PNorm = 70.4713, GNorm = 0.9038, lr_0 = 5.5579e-04
Loss = 1.3357e-01, PNorm = 70.4842, GNorm = 0.8284, lr_0 = 5.5541e-04
Loss = 1.2693e-01, PNorm = 70.4982, GNorm = 0.6596, lr_0 = 5.5503e-04
Loss = 1.3070e-01, PNorm = 70.5153, GNorm = 0.8799, lr_0 = 5.5465e-04
Loss = 1.3856e-01, PNorm = 70.5335, GNorm = 1.1137, lr_0 = 5.5427e-04
Loss = 1.3998e-01, PNorm = 70.5515, GNorm = 0.7661, lr_0 = 5.5389e-04
Loss = 1.3628e-01, PNorm = 70.5676, GNorm = 0.9642, lr_0 = 5.5351e-04
Loss = 1.3057e-01, PNorm = 70.5841, GNorm = 0.7150, lr_0 = 5.5313e-04
Loss = 1.2908e-01, PNorm = 70.5978, GNorm = 0.6571, lr_0 = 5.5275e-04
Loss = 1.3609e-01, PNorm = 70.6142, GNorm = 0.7555, lr_0 = 5.5237e-04
Loss = 1.2995e-01, PNorm = 70.6290, GNorm = 0.9165, lr_0 = 5.5199e-04
Loss = 1.4115e-01, PNorm = 70.6481, GNorm = 0.7458, lr_0 = 5.5162e-04
Loss = 1.4093e-01, PNorm = 70.6644, GNorm = 0.5964, lr_0 = 5.5124e-04
Loss = 1.3007e-01, PNorm = 70.6836, GNorm = 0.6864, lr_0 = 5.5086e-04
Loss = 1.3620e-01, PNorm = 70.6937, GNorm = 0.8342, lr_0 = 5.5048e-04
Loss = 1.3902e-01, PNorm = 70.7062, GNorm = 0.5631, lr_0 = 5.5011e-04
Loss = 1.5775e-01, PNorm = 70.7233, GNorm = 1.1824, lr_0 = 5.4973e-04
Loss = 1.2999e-01, PNorm = 70.7429, GNorm = 0.5406, lr_0 = 5.4935e-04
Loss = 1.4258e-01, PNorm = 70.7583, GNorm = 0.8639, lr_0 = 5.4898e-04
Loss = 1.3563e-01, PNorm = 70.7764, GNorm = 0.6571, lr_0 = 5.4860e-04
Loss = 1.2982e-01, PNorm = 70.7923, GNorm = 0.4726, lr_0 = 5.4822e-04
Loss = 1.4879e-01, PNorm = 70.8065, GNorm = 0.9333, lr_0 = 5.4785e-04
Loss = 1.4102e-01, PNorm = 70.8285, GNorm = 0.6473, lr_0 = 5.4747e-04
Loss = 1.4718e-01, PNorm = 70.8420, GNorm = 0.5085, lr_0 = 5.4710e-04
Loss = 1.5527e-01, PNorm = 70.8582, GNorm = 1.9836, lr_0 = 5.4672e-04
Loss = 1.5110e-01, PNorm = 70.8804, GNorm = 0.6542, lr_0 = 5.4635e-04
Loss = 1.5156e-01, PNorm = 70.8976, GNorm = 0.7152, lr_0 = 5.4597e-04
Loss = 1.4347e-01, PNorm = 70.9124, GNorm = 0.9054, lr_0 = 5.4560e-04
Loss = 1.3580e-01, PNorm = 70.9303, GNorm = 0.7083, lr_0 = 5.4523e-04
Loss = 1.4359e-01, PNorm = 70.9471, GNorm = 0.8082, lr_0 = 5.4485e-04
Loss = 1.3546e-01, PNorm = 70.9596, GNorm = 0.5372, lr_0 = 5.4448e-04
Loss = 1.2335e-01, PNorm = 70.9738, GNorm = 0.5524, lr_0 = 5.4411e-04
Loss = 1.4293e-01, PNorm = 70.9881, GNorm = 0.6669, lr_0 = 5.4373e-04
Loss = 1.4786e-01, PNorm = 71.0055, GNorm = 0.5206, lr_0 = 5.4336e-04
Loss = 1.4281e-01, PNorm = 71.0225, GNorm = 0.6347, lr_0 = 5.4299e-04
Loss = 1.3977e-01, PNorm = 71.0403, GNorm = 0.6134, lr_0 = 5.4262e-04
Loss = 1.5978e-01, PNorm = 71.0609, GNorm = 0.8009, lr_0 = 5.4225e-04
Loss = 1.2281e-01, PNorm = 71.0822, GNorm = 0.7100, lr_0 = 5.4187e-04
Loss = 1.2144e-01, PNorm = 71.0993, GNorm = 0.5636, lr_0 = 5.4150e-04
Loss = 1.9254e-01, PNorm = 71.1190, GNorm = 0.8790, lr_0 = 5.4113e-04
Loss = 1.5602e-01, PNorm = 71.1447, GNorm = 0.9725, lr_0 = 5.4076e-04
Loss = 1.3792e-01, PNorm = 71.1653, GNorm = 0.4834, lr_0 = 5.4039e-04
Loss = 1.6399e-01, PNorm = 71.1825, GNorm = 1.1784, lr_0 = 5.4002e-04
Loss = 1.4308e-01, PNorm = 71.1937, GNorm = 1.1929, lr_0 = 5.3965e-04
Loss = 1.5516e-01, PNorm = 71.2058, GNorm = 0.6982, lr_0 = 5.3928e-04
Loss = 1.5006e-01, PNorm = 71.2238, GNorm = 0.9029, lr_0 = 5.3891e-04
Loss = 1.4716e-01, PNorm = 71.2406, GNorm = 0.6390, lr_0 = 5.3854e-04
Loss = 1.2709e-01, PNorm = 71.2612, GNorm = 0.6703, lr_0 = 5.3817e-04
Loss = 1.2808e-01, PNorm = 71.2799, GNorm = 0.8204, lr_0 = 5.3781e-04
Loss = 1.6492e-01, PNorm = 71.3018, GNorm = 0.5788, lr_0 = 5.3744e-04
Loss = 1.5730e-01, PNorm = 71.3268, GNorm = 0.7079, lr_0 = 5.3707e-04
Loss = 1.3782e-01, PNorm = 71.3548, GNorm = 0.6698, lr_0 = 5.3670e-04
Loss = 1.3801e-01, PNorm = 71.3730, GNorm = 0.9942, lr_0 = 5.3633e-04
Loss = 1.5198e-01, PNorm = 71.3906, GNorm = 0.6758, lr_0 = 5.3597e-04
Loss = 1.5049e-01, PNorm = 71.3986, GNorm = 1.1079, lr_0 = 5.3560e-04
Loss = 1.3714e-01, PNorm = 71.4143, GNorm = 0.5007, lr_0 = 5.3523e-04
Loss = 1.6008e-01, PNorm = 71.4275, GNorm = 0.8882, lr_0 = 5.3486e-04
Loss = 1.4462e-01, PNorm = 71.4449, GNorm = 0.6826, lr_0 = 5.3450e-04
Loss = 1.5936e-01, PNorm = 71.4626, GNorm = 0.9435, lr_0 = 5.3413e-04
Loss = 1.3678e-01, PNorm = 71.4792, GNorm = 0.8331, lr_0 = 5.3377e-04
Loss = 1.4856e-01, PNorm = 71.5038, GNorm = 0.5852, lr_0 = 5.3340e-04
Loss = 1.5673e-01, PNorm = 71.5224, GNorm = 0.8152, lr_0 = 5.3304e-04
Loss = 1.4543e-01, PNorm = 71.5425, GNorm = 0.5564, lr_0 = 5.3267e-04
Loss = 1.4082e-01, PNorm = 71.5617, GNorm = 0.5700, lr_0 = 5.3231e-04
Loss = 1.3759e-01, PNorm = 71.5770, GNorm = 0.7741, lr_0 = 5.3194e-04
Loss = 1.5790e-01, PNorm = 71.5922, GNorm = 0.8156, lr_0 = 5.3158e-04
Loss = 1.6327e-01, PNorm = 71.6106, GNorm = 1.0791, lr_0 = 5.3121e-04
Loss = 1.5976e-01, PNorm = 71.6326, GNorm = 1.5378, lr_0 = 5.3085e-04
Loss = 1.5733e-01, PNorm = 71.6539, GNorm = 1.0354, lr_0 = 5.3048e-04
Loss = 1.4407e-01, PNorm = 71.6743, GNorm = 0.5716, lr_0 = 5.3012e-04
Loss = 1.4352e-01, PNorm = 71.6892, GNorm = 0.6465, lr_0 = 5.2976e-04
Loss = 1.1472e-01, PNorm = 71.7020, GNorm = 0.6518, lr_0 = 5.2939e-04
Loss = 1.5271e-01, PNorm = 71.7158, GNorm = 0.9599, lr_0 = 5.2903e-04
Loss = 1.4685e-01, PNorm = 71.7302, GNorm = 1.1370, lr_0 = 5.2867e-04
Loss = 1.3504e-01, PNorm = 71.7487, GNorm = 0.5474, lr_0 = 5.2831e-04
Loss = 1.3549e-01, PNorm = 71.7662, GNorm = 0.6545, lr_0 = 5.2795e-04
Loss = 1.4210e-01, PNorm = 71.7779, GNorm = 0.8157, lr_0 = 5.2758e-04
Loss = 1.2613e-01, PNorm = 71.7896, GNorm = 0.7368, lr_0 = 5.2722e-04
Loss = 1.3457e-01, PNorm = 71.8017, GNorm = 1.1442, lr_0 = 5.2686e-04
Loss = 1.3225e-01, PNorm = 71.8140, GNorm = 0.8126, lr_0 = 5.2650e-04
Loss = 1.5252e-01, PNorm = 71.8264, GNorm = 0.9349, lr_0 = 5.2614e-04
Loss = 1.5897e-01, PNorm = 71.8379, GNorm = 1.1281, lr_0 = 5.2578e-04
Loss = 1.2296e-01, PNorm = 71.8485, GNorm = 1.0148, lr_0 = 5.2542e-04
Loss = 1.4104e-01, PNorm = 71.8643, GNorm = 0.5666, lr_0 = 5.2506e-04
Loss = 1.3568e-01, PNorm = 71.8776, GNorm = 1.0368, lr_0 = 5.2470e-04
Loss = 1.5491e-01, PNorm = 71.8894, GNorm = 0.9935, lr_0 = 5.2434e-04
Loss = 1.3015e-01, PNorm = 71.8945, GNorm = 0.5382, lr_0 = 5.2398e-04
Loss = 1.5542e-01, PNorm = 71.9041, GNorm = 0.6467, lr_0 = 5.2362e-04
Loss = 1.4561e-01, PNorm = 71.9164, GNorm = 0.9127, lr_0 = 5.2326e-04
Loss = 1.3706e-01, PNorm = 71.9302, GNorm = 1.0115, lr_0 = 5.2290e-04
Loss = 1.5661e-01, PNorm = 71.9493, GNorm = 0.9950, lr_0 = 5.2255e-04
Loss = 1.4713e-01, PNorm = 71.9745, GNorm = 0.5308, lr_0 = 5.2219e-04
Loss = 1.5412e-01, PNorm = 71.9946, GNorm = 0.7230, lr_0 = 5.2183e-04
Loss = 1.4639e-01, PNorm = 72.0129, GNorm = 0.5791, lr_0 = 5.2147e-04
Loss = 1.3305e-01, PNorm = 72.0251, GNorm = 0.4943, lr_0 = 5.2112e-04
Loss = 1.6128e-01, PNorm = 72.0408, GNorm = 0.8459, lr_0 = 5.2076e-04
Loss = 1.7239e-01, PNorm = 72.0504, GNorm = 1.3151, lr_0 = 5.2040e-04
Loss = 1.3783e-01, PNorm = 72.0656, GNorm = 1.1522, lr_0 = 5.2005e-04
Loss = 1.6225e-01, PNorm = 72.0721, GNorm = 1.2682, lr_0 = 5.1969e-04
Loss = 1.3602e-01, PNorm = 72.0892, GNorm = 0.5841, lr_0 = 5.1933e-04
Loss = 1.3885e-01, PNorm = 72.1058, GNorm = 0.7742, lr_0 = 5.1898e-04
Loss = 1.3883e-01, PNorm = 72.1249, GNorm = 0.7934, lr_0 = 5.1862e-04
Loss = 1.3365e-01, PNorm = 72.1419, GNorm = 0.8700, lr_0 = 5.1827e-04
Loss = 1.4768e-01, PNorm = 72.1573, GNorm = 0.9766, lr_0 = 5.1791e-04
Validation mae = 0.247003
Epoch 10
Loss = 1.3826e-01, PNorm = 72.1775, GNorm = 0.7008, lr_0 = 5.1756e-04
Loss = 1.3340e-01, PNorm = 72.1869, GNorm = 1.0179, lr_0 = 5.1720e-04
Loss = 1.0961e-01, PNorm = 72.1957, GNorm = 0.8454, lr_0 = 5.1685e-04
Loss = 1.4442e-01, PNorm = 72.2140, GNorm = 0.9661, lr_0 = 5.1649e-04
Loss = 1.4691e-01, PNorm = 72.2326, GNorm = 0.8095, lr_0 = 5.1614e-04
Loss = 1.2420e-01, PNorm = 72.2524, GNorm = 0.5626, lr_0 = 5.1579e-04
Loss = 1.3219e-01, PNorm = 72.2717, GNorm = 0.7842, lr_0 = 5.1543e-04
Loss = 1.2788e-01, PNorm = 72.2883, GNorm = 0.5354, lr_0 = 5.1508e-04
Loss = 1.3170e-01, PNorm = 72.3003, GNorm = 0.7554, lr_0 = 5.1473e-04
Loss = 1.3137e-01, PNorm = 72.3185, GNorm = 0.5904, lr_0 = 5.1437e-04
Loss = 1.1576e-01, PNorm = 72.3305, GNorm = 0.5263, lr_0 = 5.1402e-04
Loss = 1.2793e-01, PNorm = 72.3404, GNorm = 0.5582, lr_0 = 5.1367e-04
Loss = 1.3357e-01, PNorm = 72.3556, GNorm = 0.6196, lr_0 = 5.1332e-04
Loss = 1.2339e-01, PNorm = 72.3701, GNorm = 0.5795, lr_0 = 5.1297e-04
Loss = 1.4864e-01, PNorm = 72.3812, GNorm = 0.8433, lr_0 = 5.1262e-04
Loss = 1.3933e-01, PNorm = 72.3934, GNorm = 0.7649, lr_0 = 5.1226e-04
Loss = 1.5048e-01, PNorm = 72.4103, GNorm = 1.0245, lr_0 = 5.1191e-04
Loss = 1.2216e-01, PNorm = 72.4251, GNorm = 0.7130, lr_0 = 5.1156e-04
Loss = 1.5023e-01, PNorm = 72.4405, GNorm = 0.5651, lr_0 = 5.1121e-04
Loss = 1.3943e-01, PNorm = 72.4554, GNorm = 0.5732, lr_0 = 5.1086e-04
Loss = 1.3733e-01, PNorm = 72.4711, GNorm = 1.0607, lr_0 = 5.1051e-04
Loss = 1.3826e-01, PNorm = 72.4905, GNorm = 1.2047, lr_0 = 5.1016e-04
Loss = 1.1536e-01, PNorm = 72.5046, GNorm = 0.6047, lr_0 = 5.0981e-04
Loss = 1.3834e-01, PNorm = 72.5239, GNorm = 0.6414, lr_0 = 5.0946e-04
Loss = 1.3743e-01, PNorm = 72.5436, GNorm = 0.5960, lr_0 = 5.0911e-04
Loss = 1.5334e-01, PNorm = 72.5643, GNorm = 0.8557, lr_0 = 5.0877e-04
Loss = 1.2507e-01, PNorm = 72.5721, GNorm = 0.5686, lr_0 = 5.0842e-04
Loss = 1.3156e-01, PNorm = 72.5808, GNorm = 0.7171, lr_0 = 5.0807e-04
Loss = 1.3771e-01, PNorm = 72.5962, GNorm = 0.8089, lr_0 = 5.0772e-04
Loss = 1.3539e-01, PNorm = 72.6145, GNorm = 0.6081, lr_0 = 5.0737e-04
Loss = 1.2849e-01, PNorm = 72.6270, GNorm = 0.6608, lr_0 = 5.0703e-04
Loss = 1.2407e-01, PNorm = 72.6370, GNorm = 0.4469, lr_0 = 5.0668e-04
Loss = 1.2580e-01, PNorm = 72.6525, GNorm = 0.8719, lr_0 = 5.0633e-04
Loss = 1.3266e-01, PNorm = 72.6677, GNorm = 0.7024, lr_0 = 5.0598e-04
Loss = 1.5656e-01, PNorm = 72.6856, GNorm = 0.8190, lr_0 = 5.0564e-04
Loss = 1.5322e-01, PNorm = 72.7050, GNorm = 1.3367, lr_0 = 5.0529e-04
Loss = 1.3787e-01, PNorm = 72.7229, GNorm = 0.7537, lr_0 = 5.0494e-04
Loss = 1.3385e-01, PNorm = 72.7392, GNorm = 0.6759, lr_0 = 5.0460e-04
Loss = 1.5720e-01, PNorm = 72.7592, GNorm = 0.6240, lr_0 = 5.0425e-04
Loss = 1.1565e-01, PNorm = 72.7812, GNorm = 1.1949, lr_0 = 5.0391e-04
Loss = 1.3767e-01, PNorm = 72.7995, GNorm = 1.1410, lr_0 = 5.0356e-04
Loss = 1.4230e-01, PNorm = 72.8147, GNorm = 0.6840, lr_0 = 5.0322e-04
Loss = 1.4245e-01, PNorm = 72.8220, GNorm = 0.6819, lr_0 = 5.0287e-04
Loss = 1.3202e-01, PNorm = 72.8343, GNorm = 0.6552, lr_0 = 5.0253e-04
Loss = 1.3287e-01, PNorm = 72.8472, GNorm = 0.8552, lr_0 = 5.0218e-04
Loss = 1.1220e-01, PNorm = 72.8601, GNorm = 0.5991, lr_0 = 5.0184e-04
Loss = 1.3804e-01, PNorm = 72.8697, GNorm = 0.6966, lr_0 = 5.0150e-04
Loss = 1.4963e-01, PNorm = 72.8820, GNorm = 0.7323, lr_0 = 5.0115e-04
Loss = 1.2889e-01, PNorm = 72.8979, GNorm = 1.2512, lr_0 = 5.0081e-04
Loss = 1.3361e-01, PNorm = 72.9118, GNorm = 0.9498, lr_0 = 5.0047e-04
Loss = 1.3237e-01, PNorm = 72.9293, GNorm = 0.6187, lr_0 = 5.0012e-04
Loss = 1.3377e-01, PNorm = 72.9459, GNorm = 0.6266, lr_0 = 4.9978e-04
Loss = 1.2227e-01, PNorm = 72.9638, GNorm = 0.8779, lr_0 = 4.9944e-04
Loss = 1.3264e-01, PNorm = 72.9794, GNorm = 1.1401, lr_0 = 4.9910e-04
Loss = 1.6294e-01, PNorm = 72.9919, GNorm = 0.7191, lr_0 = 4.9875e-04
Loss = 1.1361e-01, PNorm = 73.0107, GNorm = 0.7842, lr_0 = 4.9841e-04
Loss = 1.2796e-01, PNorm = 73.0197, GNorm = 0.4977, lr_0 = 4.9807e-04
Loss = 1.4422e-01, PNorm = 73.0351, GNorm = 1.1064, lr_0 = 4.9773e-04
Loss = 1.4789e-01, PNorm = 73.0518, GNorm = 1.1888, lr_0 = 4.9739e-04
Loss = 1.1369e-01, PNorm = 73.0724, GNorm = 0.6032, lr_0 = 4.9705e-04
Loss = 1.3056e-01, PNorm = 73.0810, GNorm = 0.7210, lr_0 = 4.9671e-04
Loss = 1.5032e-01, PNorm = 73.0945, GNorm = 0.8851, lr_0 = 4.9637e-04
Loss = 1.2822e-01, PNorm = 73.1046, GNorm = 0.6022, lr_0 = 4.9603e-04
Loss = 1.4491e-01, PNorm = 73.1212, GNorm = 0.5370, lr_0 = 4.9569e-04
Loss = 1.1456e-01, PNorm = 73.1336, GNorm = 1.0160, lr_0 = 4.9535e-04
Loss = 1.3508e-01, PNorm = 73.1546, GNorm = 0.6203, lr_0 = 4.9501e-04
Loss = 1.4539e-01, PNorm = 73.1715, GNorm = 0.8512, lr_0 = 4.9467e-04
Loss = 1.2162e-01, PNorm = 73.1888, GNorm = 0.7349, lr_0 = 4.9433e-04
Loss = 1.3096e-01, PNorm = 73.1999, GNorm = 0.5629, lr_0 = 4.9399e-04
Loss = 1.4197e-01, PNorm = 73.2058, GNorm = 0.7858, lr_0 = 4.9365e-04
Loss = 1.1744e-01, PNorm = 73.2222, GNorm = 0.5384, lr_0 = 4.9332e-04
Loss = 1.3288e-01, PNorm = 73.2394, GNorm = 0.6466, lr_0 = 4.9298e-04
Loss = 1.2761e-01, PNorm = 73.2530, GNorm = 0.9218, lr_0 = 4.9264e-04
Loss = 1.3514e-01, PNorm = 73.2696, GNorm = 0.6736, lr_0 = 4.9230e-04
Loss = 1.4486e-01, PNorm = 73.2858, GNorm = 0.6754, lr_0 = 4.9197e-04
Loss = 1.6678e-01, PNorm = 73.3050, GNorm = 0.7003, lr_0 = 4.9163e-04
Loss = 1.3349e-01, PNorm = 73.3280, GNorm = 0.7024, lr_0 = 4.9129e-04
Loss = 1.3908e-01, PNorm = 73.3436, GNorm = 0.6195, lr_0 = 4.9095e-04
Loss = 1.5420e-01, PNorm = 73.3539, GNorm = 0.6319, lr_0 = 4.9062e-04
Loss = 1.3947e-01, PNorm = 73.3647, GNorm = 1.1331, lr_0 = 4.9028e-04
Loss = 1.2952e-01, PNorm = 73.3807, GNorm = 0.6046, lr_0 = 4.8995e-04
Loss = 1.4409e-01, PNorm = 73.3986, GNorm = 1.1432, lr_0 = 4.8961e-04
Loss = 1.5111e-01, PNorm = 73.4149, GNorm = 1.2549, lr_0 = 4.8928e-04
Loss = 1.4200e-01, PNorm = 73.4298, GNorm = 0.7305, lr_0 = 4.8894e-04
Loss = 1.3265e-01, PNorm = 73.4458, GNorm = 0.4891, lr_0 = 4.8861e-04
Loss = 1.4240e-01, PNorm = 73.4552, GNorm = 1.1893, lr_0 = 4.8827e-04
Loss = 1.3611e-01, PNorm = 73.4677, GNorm = 1.1077, lr_0 = 4.8794e-04
Loss = 1.3287e-01, PNorm = 73.4791, GNorm = 0.7110, lr_0 = 4.8760e-04
Loss = 1.4311e-01, PNorm = 73.4874, GNorm = 1.5697, lr_0 = 4.8727e-04
Loss = 1.2629e-01, PNorm = 73.4996, GNorm = 0.5175, lr_0 = 4.8693e-04
Loss = 1.3783e-01, PNorm = 73.5101, GNorm = 0.6080, lr_0 = 4.8660e-04
Loss = 1.2467e-01, PNorm = 73.5213, GNorm = 0.5596, lr_0 = 4.8627e-04
Loss = 1.2857e-01, PNorm = 73.5359, GNorm = 1.3645, lr_0 = 4.8593e-04
Loss = 1.3608e-01, PNorm = 73.5449, GNorm = 0.7467, lr_0 = 4.8560e-04
Loss = 1.4291e-01, PNorm = 73.5652, GNorm = 0.8890, lr_0 = 4.8527e-04
Loss = 1.4421e-01, PNorm = 73.5757, GNorm = 0.5782, lr_0 = 4.8494e-04
Loss = 1.4805e-01, PNorm = 73.5889, GNorm = 0.8548, lr_0 = 4.8460e-04
Loss = 1.4017e-01, PNorm = 73.6083, GNorm = 0.6097, lr_0 = 4.8427e-04
Loss = 1.3649e-01, PNorm = 73.6270, GNorm = 0.6713, lr_0 = 4.8394e-04
Loss = 1.4050e-01, PNorm = 73.6367, GNorm = 0.9511, lr_0 = 4.8361e-04
Loss = 1.4639e-01, PNorm = 73.6576, GNorm = 0.6657, lr_0 = 4.8328e-04
Loss = 1.3269e-01, PNorm = 73.6705, GNorm = 0.8320, lr_0 = 4.8295e-04
Loss = 1.3089e-01, PNorm = 73.6797, GNorm = 0.8043, lr_0 = 4.8262e-04
Loss = 1.3415e-01, PNorm = 73.6939, GNorm = 0.6514, lr_0 = 4.8228e-04
Loss = 1.2935e-01, PNorm = 73.7041, GNorm = 0.5734, lr_0 = 4.8195e-04
Loss = 1.5221e-01, PNorm = 73.7224, GNorm = 0.7243, lr_0 = 4.8162e-04
Loss = 1.4690e-01, PNorm = 73.7433, GNorm = 1.3304, lr_0 = 4.8129e-04
Loss = 1.4136e-01, PNorm = 73.7574, GNorm = 0.6225, lr_0 = 4.8096e-04
Loss = 1.3269e-01, PNorm = 73.7735, GNorm = 1.2487, lr_0 = 4.8064e-04
Loss = 1.5410e-01, PNorm = 73.7931, GNorm = 0.8766, lr_0 = 4.8031e-04
Loss = 1.3806e-01, PNorm = 73.8108, GNorm = 0.7135, lr_0 = 4.7998e-04
Loss = 1.3404e-01, PNorm = 73.8258, GNorm = 0.6724, lr_0 = 4.7965e-04
Loss = 1.2956e-01, PNorm = 73.8411, GNorm = 0.6597, lr_0 = 4.7932e-04
Loss = 1.2632e-01, PNorm = 73.8500, GNorm = 0.6414, lr_0 = 4.7899e-04
Loss = 1.4738e-01, PNorm = 73.8607, GNorm = 1.0972, lr_0 = 4.7866e-04
Loss = 1.3006e-01, PNorm = 73.8706, GNorm = 0.6796, lr_0 = 4.7833e-04
Loss = 1.2207e-01, PNorm = 73.8837, GNorm = 0.7740, lr_0 = 4.7801e-04
Loss = 1.3719e-01, PNorm = 73.8945, GNorm = 0.6895, lr_0 = 4.7768e-04
Loss = 1.3796e-01, PNorm = 73.9074, GNorm = 0.8876, lr_0 = 4.7735e-04
Loss = 1.3437e-01, PNorm = 73.9225, GNorm = 0.6876, lr_0 = 4.7703e-04
Validation mae = 0.237318
Epoch 11
Loss = 1.0642e-01, PNorm = 73.9325, GNorm = 0.8570, lr_0 = 4.7670e-04
Loss = 1.2437e-01, PNorm = 73.9448, GNorm = 0.7837, lr_0 = 4.7637e-04
Loss = 1.0727e-01, PNorm = 73.9582, GNorm = 0.8177, lr_0 = 4.7605e-04
Loss = 1.3719e-01, PNorm = 73.9738, GNorm = 0.6188, lr_0 = 4.7572e-04
Loss = 1.2696e-01, PNorm = 73.9894, GNorm = 1.0650, lr_0 = 4.7539e-04
Loss = 1.2255e-01, PNorm = 74.0084, GNorm = 1.0065, lr_0 = 4.7507e-04
Loss = 1.2227e-01, PNorm = 74.0261, GNorm = 0.9552, lr_0 = 4.7474e-04
Loss = 1.3166e-01, PNorm = 74.0379, GNorm = 0.8543, lr_0 = 4.7442e-04
Loss = 1.3875e-01, PNorm = 74.0517, GNorm = 0.9444, lr_0 = 4.7409e-04
Loss = 1.2873e-01, PNorm = 74.0659, GNorm = 0.7663, lr_0 = 4.7377e-04
Loss = 1.1972e-01, PNorm = 74.0791, GNorm = 0.8654, lr_0 = 4.7344e-04
Loss = 1.1572e-01, PNorm = 74.0894, GNorm = 0.4942, lr_0 = 4.7312e-04
Loss = 1.1543e-01, PNorm = 74.1040, GNorm = 1.0516, lr_0 = 4.7279e-04
Loss = 1.2023e-01, PNorm = 74.1139, GNorm = 0.7048, lr_0 = 4.7247e-04
Loss = 1.3022e-01, PNorm = 74.1224, GNorm = 0.6166, lr_0 = 4.7215e-04
Loss = 1.3537e-01, PNorm = 74.1350, GNorm = 0.8019, lr_0 = 4.7182e-04
Loss = 1.2616e-01, PNorm = 74.1515, GNorm = 0.6064, lr_0 = 4.7150e-04
Loss = 1.4165e-01, PNorm = 74.1686, GNorm = 0.6275, lr_0 = 4.7118e-04
Loss = 1.1528e-01, PNorm = 74.1834, GNorm = 0.9284, lr_0 = 4.7085e-04
Loss = 1.2419e-01, PNorm = 74.1995, GNorm = 0.6594, lr_0 = 4.7053e-04
Loss = 1.1326e-01, PNorm = 74.2131, GNorm = 0.5708, lr_0 = 4.7021e-04
Loss = 1.1985e-01, PNorm = 74.2274, GNorm = 0.7387, lr_0 = 4.6989e-04
Loss = 1.1055e-01, PNorm = 74.2404, GNorm = 1.0079, lr_0 = 4.6957e-04
Loss = 1.1759e-01, PNorm = 74.2525, GNorm = 0.7228, lr_0 = 4.6924e-04
Loss = 1.3890e-01, PNorm = 74.2653, GNorm = 0.9108, lr_0 = 4.6892e-04
Loss = 1.4611e-01, PNorm = 74.2800, GNorm = 0.7841, lr_0 = 4.6860e-04
Loss = 1.3503e-01, PNorm = 74.2909, GNorm = 0.9561, lr_0 = 4.6828e-04
Loss = 1.1819e-01, PNorm = 74.3067, GNorm = 0.6045, lr_0 = 4.6796e-04
Loss = 1.0523e-01, PNorm = 74.3186, GNorm = 0.4891, lr_0 = 4.6764e-04
Loss = 1.2461e-01, PNorm = 74.3285, GNorm = 0.8160, lr_0 = 4.6732e-04
Loss = 1.2142e-01, PNorm = 74.3442, GNorm = 0.5266, lr_0 = 4.6700e-04
Loss = 1.2245e-01, PNorm = 74.3518, GNorm = 0.4939, lr_0 = 4.6668e-04
Loss = 1.1936e-01, PNorm = 74.3586, GNorm = 0.6971, lr_0 = 4.6636e-04
Loss = 1.2754e-01, PNorm = 74.3736, GNorm = 0.6542, lr_0 = 4.6604e-04
Loss = 1.0811e-01, PNorm = 74.3900, GNorm = 0.4996, lr_0 = 4.6572e-04
Loss = 1.3515e-01, PNorm = 74.4002, GNorm = 1.2814, lr_0 = 4.6540e-04
Loss = 1.1668e-01, PNorm = 74.4172, GNorm = 0.5984, lr_0 = 4.6508e-04
Loss = 1.3825e-01, PNorm = 74.4290, GNorm = 0.9389, lr_0 = 4.6476e-04
Loss = 1.2750e-01, PNorm = 74.4453, GNorm = 0.7517, lr_0 = 4.6445e-04
Loss = 1.4731e-01, PNorm = 74.4551, GNorm = 0.7921, lr_0 = 4.6413e-04
Loss = 1.4587e-01, PNorm = 74.4683, GNorm = 0.8243, lr_0 = 4.6381e-04
Loss = 1.2626e-01, PNorm = 74.4832, GNorm = 0.8661, lr_0 = 4.6349e-04
Loss = 1.2425e-01, PNorm = 74.4944, GNorm = 0.7016, lr_0 = 4.6317e-04
Loss = 1.3063e-01, PNorm = 74.5103, GNorm = 1.0175, lr_0 = 4.6286e-04
Loss = 1.2661e-01, PNorm = 74.5189, GNorm = 0.5474, lr_0 = 4.6254e-04
Loss = 1.2746e-01, PNorm = 74.5333, GNorm = 0.5451, lr_0 = 4.6222e-04
Loss = 1.4244e-01, PNorm = 74.5484, GNorm = 0.7490, lr_0 = 4.6191e-04
Loss = 1.1479e-01, PNorm = 74.5672, GNorm = 0.6261, lr_0 = 4.6159e-04
Loss = 1.3023e-01, PNorm = 74.5797, GNorm = 0.6486, lr_0 = 4.6127e-04
Loss = 1.3047e-01, PNorm = 74.5950, GNorm = 1.0241, lr_0 = 4.6096e-04
Loss = 1.2972e-01, PNorm = 74.6051, GNorm = 0.7536, lr_0 = 4.6064e-04
Loss = 1.2524e-01, PNorm = 74.6221, GNorm = 0.8675, lr_0 = 4.6033e-04
Loss = 1.2136e-01, PNorm = 74.6363, GNorm = 0.6575, lr_0 = 4.6001e-04
Loss = 1.3395e-01, PNorm = 74.6525, GNorm = 0.6613, lr_0 = 4.5970e-04
Loss = 1.2638e-01, PNorm = 74.6666, GNorm = 0.6758, lr_0 = 4.5938e-04
Loss = 1.3646e-01, PNorm = 74.6770, GNorm = 0.6476, lr_0 = 4.5907e-04
Loss = 1.2961e-01, PNorm = 74.6864, GNorm = 0.9784, lr_0 = 4.5875e-04
Loss = 1.4420e-01, PNorm = 74.6953, GNorm = 0.8023, lr_0 = 4.5844e-04
Loss = 1.2044e-01, PNorm = 74.7091, GNorm = 0.6960, lr_0 = 4.5812e-04
Loss = 1.4228e-01, PNorm = 74.7195, GNorm = 0.8106, lr_0 = 4.5781e-04
Loss = 1.2867e-01, PNorm = 74.7319, GNorm = 0.6503, lr_0 = 4.5750e-04
Loss = 1.2632e-01, PNorm = 74.7435, GNorm = 0.5233, lr_0 = 4.5718e-04
Loss = 1.1575e-01, PNorm = 74.7534, GNorm = 0.8411, lr_0 = 4.5687e-04
Loss = 1.2408e-01, PNorm = 74.7636, GNorm = 0.6006, lr_0 = 4.5656e-04
Loss = 1.2797e-01, PNorm = 74.7760, GNorm = 0.6868, lr_0 = 4.5624e-04
Loss = 1.2678e-01, PNorm = 74.7880, GNorm = 1.0724, lr_0 = 4.5593e-04
Loss = 1.3246e-01, PNorm = 74.8066, GNorm = 0.8316, lr_0 = 4.5562e-04
Loss = 1.2339e-01, PNorm = 74.8157, GNorm = 0.5777, lr_0 = 4.5531e-04
Loss = 1.2191e-01, PNorm = 74.8275, GNorm = 0.9151, lr_0 = 4.5499e-04
Loss = 1.2798e-01, PNorm = 74.8402, GNorm = 0.6858, lr_0 = 4.5468e-04
Loss = 1.1744e-01, PNorm = 74.8552, GNorm = 0.6980, lr_0 = 4.5437e-04
Loss = 1.2353e-01, PNorm = 74.8676, GNorm = 0.6936, lr_0 = 4.5406e-04
Loss = 1.3704e-01, PNorm = 74.8772, GNorm = 0.7578, lr_0 = 4.5375e-04
Loss = 1.2618e-01, PNorm = 74.8904, GNorm = 0.7455, lr_0 = 4.5344e-04
Loss = 1.3256e-01, PNorm = 74.8939, GNorm = 0.4966, lr_0 = 4.5313e-04
Loss = 1.2867e-01, PNorm = 74.9032, GNorm = 0.5747, lr_0 = 4.5282e-04
Loss = 1.3738e-01, PNorm = 74.9145, GNorm = 0.6522, lr_0 = 4.5251e-04
Loss = 1.3064e-01, PNorm = 74.9280, GNorm = 0.7370, lr_0 = 4.5220e-04
Loss = 1.3953e-01, PNorm = 74.9389, GNorm = 0.9971, lr_0 = 4.5189e-04
Loss = 1.3114e-01, PNorm = 74.9512, GNorm = 1.2455, lr_0 = 4.5158e-04
Loss = 1.4346e-01, PNorm = 74.9635, GNorm = 0.5444, lr_0 = 4.5127e-04
Loss = 1.4997e-01, PNorm = 74.9792, GNorm = 0.9920, lr_0 = 4.5096e-04
Loss = 1.2543e-01, PNorm = 74.9979, GNorm = 0.4161, lr_0 = 4.5065e-04
Loss = 1.2290e-01, PNorm = 75.0105, GNorm = 0.6024, lr_0 = 4.5034e-04
Loss = 1.3638e-01, PNorm = 75.0192, GNorm = 0.9305, lr_0 = 4.5003e-04
Loss = 1.3190e-01, PNorm = 75.0265, GNorm = 0.6676, lr_0 = 4.4972e-04
Loss = 1.2548e-01, PNorm = 75.0368, GNorm = 0.8993, lr_0 = 4.4942e-04
Loss = 1.2222e-01, PNorm = 75.0485, GNorm = 0.6897, lr_0 = 4.4911e-04
Loss = 1.5066e-01, PNorm = 75.0631, GNorm = 0.9318, lr_0 = 4.4880e-04
Loss = 1.2669e-01, PNorm = 75.0760, GNorm = 0.8345, lr_0 = 4.4849e-04
Loss = 1.4352e-01, PNorm = 75.0891, GNorm = 0.9089, lr_0 = 4.4819e-04
Loss = 1.3980e-01, PNorm = 75.1026, GNorm = 0.5072, lr_0 = 4.4788e-04
Loss = 1.3282e-01, PNorm = 75.1182, GNorm = 0.9825, lr_0 = 4.4757e-04
Loss = 1.4825e-01, PNorm = 75.1280, GNorm = 0.7211, lr_0 = 4.4727e-04
Loss = 1.1893e-01, PNorm = 75.1439, GNorm = 0.6512, lr_0 = 4.4696e-04
Loss = 1.1406e-01, PNorm = 75.1558, GNorm = 0.7326, lr_0 = 4.4665e-04
Loss = 1.1690e-01, PNorm = 75.1687, GNorm = 0.5331, lr_0 = 4.4635e-04
Loss = 1.3686e-01, PNorm = 75.1820, GNorm = 0.6844, lr_0 = 4.4604e-04
Loss = 1.2631e-01, PNorm = 75.1997, GNorm = 0.7762, lr_0 = 4.4574e-04
Loss = 1.2996e-01, PNorm = 75.2074, GNorm = 0.7573, lr_0 = 4.4543e-04
Loss = 1.3954e-01, PNorm = 75.2196, GNorm = 0.7045, lr_0 = 4.4513e-04
Loss = 1.1802e-01, PNorm = 75.2293, GNorm = 0.4812, lr_0 = 4.4482e-04
Loss = 1.2846e-01, PNorm = 75.2406, GNorm = 0.6389, lr_0 = 4.4452e-04
Loss = 1.2734e-01, PNorm = 75.2563, GNorm = 0.7412, lr_0 = 4.4421e-04
Loss = 1.3634e-01, PNorm = 75.2689, GNorm = 0.5842, lr_0 = 4.4391e-04
Loss = 1.2330e-01, PNorm = 75.2825, GNorm = 0.7285, lr_0 = 4.4360e-04
Loss = 1.3345e-01, PNorm = 75.2941, GNorm = 0.6595, lr_0 = 4.4330e-04
Loss = 1.4718e-01, PNorm = 75.3025, GNorm = 0.5895, lr_0 = 4.4299e-04
Loss = 1.2643e-01, PNorm = 75.3112, GNorm = 0.8360, lr_0 = 4.4269e-04
Loss = 1.2546e-01, PNorm = 75.3246, GNorm = 0.7063, lr_0 = 4.4239e-04
Loss = 1.4248e-01, PNorm = 75.3408, GNorm = 0.6438, lr_0 = 4.4209e-04
Loss = 1.3317e-01, PNorm = 75.3554, GNorm = 0.6433, lr_0 = 4.4178e-04
Loss = 1.2616e-01, PNorm = 75.3648, GNorm = 1.0171, lr_0 = 4.4148e-04
Loss = 1.2089e-01, PNorm = 75.3765, GNorm = 1.2782, lr_0 = 4.4118e-04
Loss = 1.3654e-01, PNorm = 75.3911, GNorm = 0.8730, lr_0 = 4.4088e-04
Loss = 1.3204e-01, PNorm = 75.4040, GNorm = 0.5446, lr_0 = 4.4057e-04
Loss = 1.2812e-01, PNorm = 75.4153, GNorm = 0.5366, lr_0 = 4.4027e-04
Loss = 1.2599e-01, PNorm = 75.4267, GNorm = 0.6487, lr_0 = 4.3997e-04
Loss = 1.2831e-01, PNorm = 75.4439, GNorm = 0.8331, lr_0 = 4.3967e-04
Loss = 1.3912e-01, PNorm = 75.4584, GNorm = 0.6850, lr_0 = 4.3937e-04
Validation mae = 0.240132
Epoch 12
Loss = 1.3228e-01, PNorm = 75.4700, GNorm = 0.5997, lr_0 = 4.3907e-04
Loss = 1.1522e-01, PNorm = 75.4808, GNorm = 0.8086, lr_0 = 4.3877e-04
Loss = 1.1810e-01, PNorm = 75.4940, GNorm = 0.7701, lr_0 = 4.3846e-04
Loss = 1.1861e-01, PNorm = 75.5053, GNorm = 0.6045, lr_0 = 4.3816e-04
Loss = 1.1959e-01, PNorm = 75.5107, GNorm = 0.6820, lr_0 = 4.3786e-04
Loss = 1.2459e-01, PNorm = 75.5230, GNorm = 1.0883, lr_0 = 4.3756e-04
Loss = 1.3154e-01, PNorm = 75.5365, GNorm = 0.6050, lr_0 = 4.3726e-04
Loss = 1.0967e-01, PNorm = 75.5494, GNorm = 0.5200, lr_0 = 4.3696e-04
Loss = 1.1619e-01, PNorm = 75.5623, GNorm = 0.9071, lr_0 = 4.3667e-04
Loss = 1.3683e-01, PNorm = 75.5720, GNorm = 0.6835, lr_0 = 4.3637e-04
Loss = 1.2501e-01, PNorm = 75.5859, GNorm = 0.6089, lr_0 = 4.3607e-04
Loss = 1.2797e-01, PNorm = 75.6021, GNorm = 0.6121, lr_0 = 4.3577e-04
Loss = 1.1814e-01, PNorm = 75.6167, GNorm = 0.7378, lr_0 = 4.3547e-04
Loss = 1.2897e-01, PNorm = 75.6332, GNorm = 0.7784, lr_0 = 4.3517e-04
Loss = 1.2046e-01, PNorm = 75.6476, GNorm = 0.6163, lr_0 = 4.3487e-04
Loss = 1.4292e-01, PNorm = 75.6558, GNorm = 0.6808, lr_0 = 4.3458e-04
Loss = 1.1823e-01, PNorm = 75.6661, GNorm = 0.7186, lr_0 = 4.3428e-04
Loss = 1.1668e-01, PNorm = 75.6765, GNorm = 0.5058, lr_0 = 4.3398e-04
Loss = 1.5414e-01, PNorm = 75.6935, GNorm = 0.6679, lr_0 = 4.3368e-04
Loss = 1.1438e-01, PNorm = 75.7059, GNorm = 0.5284, lr_0 = 4.3339e-04
Loss = 1.1697e-01, PNorm = 75.7147, GNorm = 0.6403, lr_0 = 4.3309e-04
Loss = 1.2744e-01, PNorm = 75.7339, GNorm = 0.6441, lr_0 = 4.3279e-04
Loss = 1.1677e-01, PNorm = 75.7501, GNorm = 0.7521, lr_0 = 4.3250e-04
Loss = 1.2495e-01, PNorm = 75.7670, GNorm = 1.0413, lr_0 = 4.3220e-04
Loss = 1.2111e-01, PNorm = 75.7782, GNorm = 0.8766, lr_0 = 4.3190e-04
Loss = 1.4127e-01, PNorm = 75.7914, GNorm = 0.9268, lr_0 = 4.3161e-04
Loss = 1.2020e-01, PNorm = 75.8063, GNorm = 0.6222, lr_0 = 4.3131e-04
Loss = 1.0405e-01, PNorm = 75.8186, GNorm = 0.6065, lr_0 = 4.3102e-04
Loss = 1.3284e-01, PNorm = 75.8301, GNorm = 0.7329, lr_0 = 4.3072e-04
Loss = 1.0900e-01, PNorm = 75.8390, GNorm = 0.5996, lr_0 = 4.3043e-04
Loss = 1.2119e-01, PNorm = 75.8492, GNorm = 1.1225, lr_0 = 4.3013e-04
Loss = 1.2477e-01, PNorm = 75.8587, GNorm = 0.6576, lr_0 = 4.2984e-04
Loss = 1.3358e-01, PNorm = 75.8688, GNorm = 0.9493, lr_0 = 4.2954e-04
Loss = 1.2824e-01, PNorm = 75.8796, GNorm = 1.0408, lr_0 = 4.2925e-04
Loss = 1.2762e-01, PNorm = 75.8912, GNorm = 0.5414, lr_0 = 4.2895e-04
Loss = 1.1588e-01, PNorm = 75.8981, GNorm = 0.5173, lr_0 = 4.2866e-04
Loss = 1.2653e-01, PNorm = 75.9074, GNorm = 0.6610, lr_0 = 4.2837e-04
Loss = 1.1785e-01, PNorm = 75.9195, GNorm = 0.8133, lr_0 = 4.2807e-04
Loss = 1.2184e-01, PNorm = 75.9341, GNorm = 0.5163, lr_0 = 4.2778e-04
Loss = 1.1918e-01, PNorm = 75.9488, GNorm = 0.5217, lr_0 = 4.2749e-04
Loss = 1.0531e-01, PNorm = 75.9599, GNorm = 0.6454, lr_0 = 4.2719e-04
Loss = 1.1155e-01, PNorm = 75.9673, GNorm = 0.6968, lr_0 = 4.2690e-04
Loss = 1.4312e-01, PNorm = 75.9770, GNorm = 0.5835, lr_0 = 4.2661e-04
Loss = 1.3714e-01, PNorm = 75.9870, GNorm = 0.4890, lr_0 = 4.2632e-04
Loss = 1.2428e-01, PNorm = 75.9984, GNorm = 0.6534, lr_0 = 4.2602e-04
Loss = 1.2184e-01, PNorm = 76.0113, GNorm = 0.8839, lr_0 = 4.2573e-04
Loss = 1.1065e-01, PNorm = 76.0245, GNorm = 0.6906, lr_0 = 4.2544e-04
Loss = 1.2261e-01, PNorm = 76.0356, GNorm = 0.8331, lr_0 = 4.2515e-04
Loss = 1.2671e-01, PNorm = 76.0507, GNorm = 1.2226, lr_0 = 4.2486e-04
Loss = 1.2482e-01, PNorm = 76.0595, GNorm = 0.5598, lr_0 = 4.2457e-04
Loss = 1.2265e-01, PNorm = 76.0704, GNorm = 0.6941, lr_0 = 4.2428e-04
Loss = 1.3037e-01, PNorm = 76.0799, GNorm = 0.6167, lr_0 = 4.2399e-04
Loss = 1.4648e-01, PNorm = 76.0909, GNorm = 0.8305, lr_0 = 4.2370e-04
Loss = 1.2465e-01, PNorm = 76.1050, GNorm = 0.8628, lr_0 = 4.2340e-04
Loss = 1.2782e-01, PNorm = 76.1196, GNorm = 0.7859, lr_0 = 4.2311e-04
Loss = 1.1978e-01, PNorm = 76.1325, GNorm = 0.7741, lr_0 = 4.2283e-04
Loss = 1.3061e-01, PNorm = 76.1354, GNorm = 0.6283, lr_0 = 4.2254e-04
Loss = 1.2138e-01, PNorm = 76.1424, GNorm = 0.6221, lr_0 = 4.2225e-04
Loss = 9.6351e-02, PNorm = 76.1532, GNorm = 0.4484, lr_0 = 4.2196e-04
Loss = 1.0704e-01, PNorm = 76.1632, GNorm = 1.0563, lr_0 = 4.2167e-04
Loss = 1.1738e-01, PNorm = 76.1691, GNorm = 0.5433, lr_0 = 4.2138e-04
Loss = 1.1695e-01, PNorm = 76.1765, GNorm = 0.8842, lr_0 = 4.2109e-04
Loss = 1.1465e-01, PNorm = 76.1836, GNorm = 0.5503, lr_0 = 4.2080e-04
Loss = 1.2498e-01, PNorm = 76.1931, GNorm = 0.7529, lr_0 = 4.2051e-04
Loss = 1.3084e-01, PNorm = 76.2046, GNorm = 0.8205, lr_0 = 4.2023e-04
Loss = 1.2401e-01, PNorm = 76.2222, GNorm = 1.5748, lr_0 = 4.1994e-04
Loss = 1.2871e-01, PNorm = 76.2383, GNorm = 0.9436, lr_0 = 4.1965e-04
Loss = 1.2017e-01, PNorm = 76.2539, GNorm = 0.6464, lr_0 = 4.1936e-04
Loss = 1.1867e-01, PNorm = 76.2643, GNorm = 0.5286, lr_0 = 4.1907e-04
Loss = 1.1730e-01, PNorm = 76.2782, GNorm = 0.5345, lr_0 = 4.1879e-04
Loss = 1.1552e-01, PNorm = 76.2889, GNorm = 0.8627, lr_0 = 4.1850e-04
Loss = 1.1988e-01, PNorm = 76.2985, GNorm = 0.4975, lr_0 = 4.1821e-04
Loss = 1.2815e-01, PNorm = 76.3091, GNorm = 0.8624, lr_0 = 4.1793e-04
Loss = 1.1251e-01, PNorm = 76.3147, GNorm = 0.5983, lr_0 = 4.1764e-04
Loss = 1.1429e-01, PNorm = 76.3252, GNorm = 0.6480, lr_0 = 4.1736e-04
Loss = 1.1904e-01, PNorm = 76.3356, GNorm = 0.9344, lr_0 = 4.1707e-04
Loss = 1.3492e-01, PNorm = 76.3477, GNorm = 0.7869, lr_0 = 4.1678e-04
Loss = 1.1878e-01, PNorm = 76.3594, GNorm = 0.5832, lr_0 = 4.1650e-04
Loss = 1.0911e-01, PNorm = 76.3661, GNorm = 0.5067, lr_0 = 4.1621e-04
Loss = 1.2730e-01, PNorm = 76.3722, GNorm = 1.2390, lr_0 = 4.1593e-04
Loss = 1.3890e-01, PNorm = 76.3813, GNorm = 0.7698, lr_0 = 4.1564e-04
Loss = 1.3435e-01, PNorm = 76.3934, GNorm = 0.7615, lr_0 = 4.1536e-04
Loss = 1.1905e-01, PNorm = 76.4061, GNorm = 0.5504, lr_0 = 4.1507e-04
Loss = 1.4635e-01, PNorm = 76.4138, GNorm = 0.6499, lr_0 = 4.1479e-04
Loss = 1.2610e-01, PNorm = 76.4235, GNorm = 0.5683, lr_0 = 4.1450e-04
Loss = 1.1614e-01, PNorm = 76.4347, GNorm = 0.6328, lr_0 = 4.1422e-04
Loss = 1.2810e-01, PNorm = 76.4447, GNorm = 0.6843, lr_0 = 4.1394e-04
Loss = 1.3792e-01, PNorm = 76.4576, GNorm = 2.0574, lr_0 = 4.1365e-04
Loss = 1.2127e-01, PNorm = 76.4739, GNorm = 0.6526, lr_0 = 4.1337e-04
Loss = 1.3782e-01, PNorm = 76.4873, GNorm = 0.9181, lr_0 = 4.1309e-04
Loss = 1.0785e-01, PNorm = 76.5021, GNorm = 1.3654, lr_0 = 4.1280e-04
Loss = 1.2538e-01, PNorm = 76.5148, GNorm = 0.7143, lr_0 = 4.1252e-04
Loss = 1.1605e-01, PNorm = 76.5293, GNorm = 0.7184, lr_0 = 4.1224e-04
Loss = 1.2955e-01, PNorm = 76.5413, GNorm = 0.9294, lr_0 = 4.1196e-04
Loss = 1.2405e-01, PNorm = 76.5469, GNorm = 0.8079, lr_0 = 4.1167e-04
Loss = 1.2013e-01, PNorm = 76.5577, GNorm = 0.6217, lr_0 = 4.1139e-04
Loss = 1.1980e-01, PNorm = 76.5706, GNorm = 0.8903, lr_0 = 4.1111e-04
Loss = 1.1979e-01, PNorm = 76.5769, GNorm = 0.9863, lr_0 = 4.1083e-04
Loss = 1.2781e-01, PNorm = 76.5794, GNorm = 0.9205, lr_0 = 4.1055e-04
Loss = 1.3181e-01, PNorm = 76.5899, GNorm = 0.7908, lr_0 = 4.1027e-04
Loss = 1.1880e-01, PNorm = 76.6002, GNorm = 0.4816, lr_0 = 4.0998e-04
Loss = 1.2951e-01, PNorm = 76.6086, GNorm = 0.6241, lr_0 = 4.0970e-04
Loss = 1.3354e-01, PNorm = 76.6226, GNorm = 0.4791, lr_0 = 4.0942e-04
Loss = 1.1485e-01, PNorm = 76.6308, GNorm = 1.4180, lr_0 = 4.0914e-04
Loss = 1.3274e-01, PNorm = 76.6485, GNorm = 0.6287, lr_0 = 4.0886e-04
Loss = 1.3100e-01, PNorm = 76.6631, GNorm = 1.0153, lr_0 = 4.0858e-04
Loss = 1.1128e-01, PNorm = 76.6734, GNorm = 0.7050, lr_0 = 4.0830e-04
Loss = 1.1746e-01, PNorm = 76.6864, GNorm = 0.8791, lr_0 = 4.0802e-04
Loss = 1.1999e-01, PNorm = 76.6980, GNorm = 0.5746, lr_0 = 4.0774e-04
Loss = 1.2470e-01, PNorm = 76.7084, GNorm = 0.9040, lr_0 = 4.0746e-04
Loss = 1.6050e-01, PNorm = 76.7229, GNorm = 0.6810, lr_0 = 4.0718e-04
Loss = 1.1735e-01, PNorm = 76.7334, GNorm = 0.5278, lr_0 = 4.0691e-04
Loss = 1.2813e-01, PNorm = 76.7457, GNorm = 0.7767, lr_0 = 4.0663e-04
Loss = 1.1543e-01, PNorm = 76.7582, GNorm = 1.0680, lr_0 = 4.0635e-04
Loss = 1.2881e-01, PNorm = 76.7616, GNorm = 0.6893, lr_0 = 4.0607e-04
Loss = 1.2450e-01, PNorm = 76.7705, GNorm = 1.2886, lr_0 = 4.0579e-04
Loss = 1.3153e-01, PNorm = 76.7806, GNorm = 0.8405, lr_0 = 4.0551e-04
Loss = 1.1885e-01, PNorm = 76.7929, GNorm = 0.4980, lr_0 = 4.0524e-04
Loss = 1.2229e-01, PNorm = 76.8007, GNorm = 0.7184, lr_0 = 4.0496e-04
Loss = 1.1700e-01, PNorm = 76.8088, GNorm = 0.5746, lr_0 = 4.0468e-04
Validation mae = 0.235563
Epoch 13
Loss = 1.0609e-01, PNorm = 76.8213, GNorm = 1.0191, lr_0 = 4.0440e-04
Loss = 1.0379e-01, PNorm = 76.8319, GNorm = 0.6792, lr_0 = 4.0413e-04
Loss = 1.0357e-01, PNorm = 76.8406, GNorm = 0.5563, lr_0 = 4.0385e-04
Loss = 1.1326e-01, PNorm = 76.8503, GNorm = 0.6137, lr_0 = 4.0357e-04
Loss = 1.1354e-01, PNorm = 76.8593, GNorm = 0.6077, lr_0 = 4.0330e-04
Loss = 1.2947e-01, PNorm = 76.8729, GNorm = 0.6129, lr_0 = 4.0302e-04
Loss = 1.2885e-01, PNorm = 76.8819, GNorm = 0.5408, lr_0 = 4.0274e-04
Loss = 1.1913e-01, PNorm = 76.8948, GNorm = 0.9160, lr_0 = 4.0247e-04
Loss = 1.1742e-01, PNorm = 76.9034, GNorm = 0.5193, lr_0 = 4.0219e-04
Loss = 1.1621e-01, PNorm = 76.9187, GNorm = 1.3529, lr_0 = 4.0192e-04
Loss = 1.1960e-01, PNorm = 76.9281, GNorm = 0.6588, lr_0 = 4.0164e-04
Loss = 1.0788e-01, PNorm = 76.9368, GNorm = 0.5935, lr_0 = 4.0137e-04
Loss = 1.1318e-01, PNorm = 76.9462, GNorm = 0.9827, lr_0 = 4.0109e-04
Loss = 1.4067e-01, PNorm = 76.9557, GNorm = 0.5455, lr_0 = 4.0082e-04
Loss = 1.1713e-01, PNorm = 76.9681, GNorm = 0.6725, lr_0 = 4.0054e-04
Loss = 1.0610e-01, PNorm = 76.9780, GNorm = 0.4350, lr_0 = 4.0027e-04
Loss = 1.1851e-01, PNorm = 76.9889, GNorm = 0.7512, lr_0 = 3.9999e-04
Loss = 1.0404e-01, PNorm = 76.9984, GNorm = 0.9523, lr_0 = 3.9972e-04
Loss = 1.3420e-01, PNorm = 77.0095, GNorm = 1.1773, lr_0 = 3.9945e-04
Loss = 1.1473e-01, PNorm = 77.0220, GNorm = 0.5393, lr_0 = 3.9917e-04
Loss = 1.1661e-01, PNorm = 77.0338, GNorm = 0.5427, lr_0 = 3.9890e-04
Loss = 1.1793e-01, PNorm = 77.0476, GNorm = 0.9724, lr_0 = 3.9863e-04
Loss = 1.0949e-01, PNorm = 77.0634, GNorm = 0.6527, lr_0 = 3.9835e-04
Loss = 1.2577e-01, PNorm = 77.0718, GNorm = 0.5525, lr_0 = 3.9808e-04
Loss = 1.3020e-01, PNorm = 77.0789, GNorm = 0.6296, lr_0 = 3.9781e-04
Loss = 1.0727e-01, PNorm = 77.0865, GNorm = 0.6182, lr_0 = 3.9753e-04
Loss = 1.0954e-01, PNorm = 77.0983, GNorm = 0.5214, lr_0 = 3.9726e-04
Loss = 1.1972e-01, PNorm = 77.1070, GNorm = 0.5158, lr_0 = 3.9699e-04
Loss = 1.0928e-01, PNorm = 77.1190, GNorm = 1.0398, lr_0 = 3.9672e-04
Loss = 1.1563e-01, PNorm = 77.1293, GNorm = 0.5267, lr_0 = 3.9645e-04
Loss = 1.0620e-01, PNorm = 77.1359, GNorm = 0.7711, lr_0 = 3.9617e-04
Loss = 1.2936e-01, PNorm = 77.1470, GNorm = 0.6750, lr_0 = 3.9590e-04
Loss = 1.1767e-01, PNorm = 77.1595, GNorm = 0.6866, lr_0 = 3.9563e-04
Loss = 1.1886e-01, PNorm = 77.1727, GNorm = 0.8209, lr_0 = 3.9536e-04
Loss = 1.0177e-01, PNorm = 77.1856, GNorm = 0.7373, lr_0 = 3.9509e-04
Loss = 1.1394e-01, PNorm = 77.1992, GNorm = 0.8170, lr_0 = 3.9482e-04
Loss = 1.3435e-01, PNorm = 77.2099, GNorm = 0.7364, lr_0 = 3.9455e-04
Loss = 1.0467e-01, PNorm = 77.2222, GNorm = 0.8632, lr_0 = 3.9428e-04
Loss = 1.3784e-01, PNorm = 77.2338, GNorm = 0.5931, lr_0 = 3.9401e-04
Loss = 1.2679e-01, PNorm = 77.2468, GNorm = 1.3178, lr_0 = 3.9374e-04
Loss = 1.1108e-01, PNorm = 77.2560, GNorm = 0.7807, lr_0 = 3.9347e-04
Loss = 1.0958e-01, PNorm = 77.2659, GNorm = 0.5126, lr_0 = 3.9320e-04
Loss = 1.1972e-01, PNorm = 77.2755, GNorm = 0.6133, lr_0 = 3.9293e-04
Loss = 1.3072e-01, PNorm = 77.2823, GNorm = 0.7352, lr_0 = 3.9266e-04
Loss = 1.3355e-01, PNorm = 77.2915, GNorm = 0.8565, lr_0 = 3.9239e-04
Loss = 1.1353e-01, PNorm = 77.3007, GNorm = 0.6352, lr_0 = 3.9212e-04
Loss = 1.1697e-01, PNorm = 77.3099, GNorm = 0.6273, lr_0 = 3.9185e-04
Loss = 1.1769e-01, PNorm = 77.3192, GNorm = 0.7615, lr_0 = 3.9159e-04
Loss = 1.1915e-01, PNorm = 77.3273, GNorm = 0.5841, lr_0 = 3.9132e-04
Loss = 1.1443e-01, PNorm = 77.3400, GNorm = 0.6849, lr_0 = 3.9105e-04
Loss = 1.1690e-01, PNorm = 77.3495, GNorm = 0.6473, lr_0 = 3.9078e-04
Loss = 1.1999e-01, PNorm = 77.3627, GNorm = 1.8406, lr_0 = 3.9051e-04
Loss = 1.2655e-01, PNorm = 77.3704, GNorm = 0.6278, lr_0 = 3.9025e-04
Loss = 1.1521e-01, PNorm = 77.3785, GNorm = 0.6445, lr_0 = 3.8998e-04
Loss = 1.1967e-01, PNorm = 77.3918, GNorm = 0.9067, lr_0 = 3.8971e-04
Loss = 1.0786e-01, PNorm = 77.3998, GNorm = 0.6765, lr_0 = 3.8945e-04
Loss = 1.1116e-01, PNorm = 77.4059, GNorm = 0.7384, lr_0 = 3.8918e-04
Loss = 1.3022e-01, PNorm = 77.4104, GNorm = 0.9810, lr_0 = 3.8891e-04
Loss = 1.2476e-01, PNorm = 77.4214, GNorm = 0.7237, lr_0 = 3.8865e-04
Loss = 1.2009e-01, PNorm = 77.4315, GNorm = 0.7930, lr_0 = 3.8838e-04
Loss = 1.2090e-01, PNorm = 77.4448, GNorm = 0.7046, lr_0 = 3.8811e-04
Loss = 1.2364e-01, PNorm = 77.4554, GNorm = 0.6736, lr_0 = 3.8785e-04
Loss = 1.1404e-01, PNorm = 77.4695, GNorm = 1.1585, lr_0 = 3.8758e-04
Loss = 1.2554e-01, PNorm = 77.4776, GNorm = 0.6890, lr_0 = 3.8732e-04
Loss = 1.1958e-01, PNorm = 77.4865, GNorm = 0.4989, lr_0 = 3.8705e-04
Loss = 1.1377e-01, PNorm = 77.4943, GNorm = 0.6391, lr_0 = 3.8679e-04
Loss = 1.0821e-01, PNorm = 77.5007, GNorm = 0.8273, lr_0 = 3.8652e-04
Loss = 1.0912e-01, PNorm = 77.5085, GNorm = 0.6753, lr_0 = 3.8626e-04
Loss = 1.1256e-01, PNorm = 77.5193, GNorm = 0.6070, lr_0 = 3.8599e-04
Loss = 1.2298e-01, PNorm = 77.5316, GNorm = 0.5731, lr_0 = 3.8573e-04
Loss = 1.3011e-01, PNorm = 77.5403, GNorm = 0.9689, lr_0 = 3.8546e-04
Loss = 1.1893e-01, PNorm = 77.5541, GNorm = 0.5596, lr_0 = 3.8520e-04
Loss = 1.1847e-01, PNorm = 77.5631, GNorm = 0.8435, lr_0 = 3.8493e-04
Loss = 1.2385e-01, PNorm = 77.5721, GNorm = 0.6673, lr_0 = 3.8467e-04
Loss = 1.2267e-01, PNorm = 77.5819, GNorm = 0.6463, lr_0 = 3.8441e-04
Loss = 1.2336e-01, PNorm = 77.5910, GNorm = 0.7973, lr_0 = 3.8414e-04
Loss = 1.2286e-01, PNorm = 77.5989, GNorm = 0.9574, lr_0 = 3.8388e-04
Loss = 1.2032e-01, PNorm = 77.6100, GNorm = 0.4563, lr_0 = 3.8362e-04
Loss = 1.0698e-01, PNorm = 77.6187, GNorm = 0.5824, lr_0 = 3.8336e-04
Loss = 1.1183e-01, PNorm = 77.6251, GNorm = 0.8632, lr_0 = 3.8309e-04
Loss = 1.0812e-01, PNorm = 77.6334, GNorm = 0.8533, lr_0 = 3.8283e-04
Loss = 1.1219e-01, PNorm = 77.6381, GNorm = 0.6737, lr_0 = 3.8257e-04
Loss = 1.1600e-01, PNorm = 77.6495, GNorm = 0.5927, lr_0 = 3.8231e-04
Loss = 1.2000e-01, PNorm = 77.6572, GNorm = 0.6632, lr_0 = 3.8204e-04
Loss = 1.1483e-01, PNorm = 77.6672, GNorm = 0.5763, lr_0 = 3.8178e-04
Loss = 1.3581e-01, PNorm = 77.6795, GNorm = 0.6729, lr_0 = 3.8152e-04
Loss = 1.0614e-01, PNorm = 77.6888, GNorm = 0.7948, lr_0 = 3.8126e-04
Loss = 1.1855e-01, PNorm = 77.7007, GNorm = 0.9210, lr_0 = 3.8100e-04
Loss = 1.0928e-01, PNorm = 77.7105, GNorm = 0.6919, lr_0 = 3.8074e-04
Loss = 1.0771e-01, PNorm = 77.7199, GNorm = 1.0638, lr_0 = 3.8048e-04
Loss = 1.1403e-01, PNorm = 77.7284, GNorm = 0.5816, lr_0 = 3.8022e-04
Loss = 1.1773e-01, PNorm = 77.7382, GNorm = 0.7127, lr_0 = 3.7995e-04
Loss = 1.0253e-01, PNorm = 77.7443, GNorm = 0.7790, lr_0 = 3.7969e-04
Loss = 1.1493e-01, PNorm = 77.7512, GNorm = 0.5484, lr_0 = 3.7943e-04
Loss = 1.0730e-01, PNorm = 77.7565, GNorm = 0.5458, lr_0 = 3.7917e-04
Loss = 1.1943e-01, PNorm = 77.7623, GNorm = 0.6896, lr_0 = 3.7891e-04
Loss = 1.2751e-01, PNorm = 77.7700, GNorm = 0.8938, lr_0 = 3.7866e-04
Loss = 1.1943e-01, PNorm = 77.7800, GNorm = 0.5508, lr_0 = 3.7840e-04
Loss = 1.1890e-01, PNorm = 77.7955, GNorm = 0.5745, lr_0 = 3.7814e-04
Loss = 1.1659e-01, PNorm = 77.8094, GNorm = 0.7913, lr_0 = 3.7788e-04
Loss = 1.2806e-01, PNorm = 77.8236, GNorm = 0.6394, lr_0 = 3.7762e-04
Loss = 1.0567e-01, PNorm = 77.8341, GNorm = 0.7078, lr_0 = 3.7736e-04
Loss = 1.1173e-01, PNorm = 77.8409, GNorm = 0.6521, lr_0 = 3.7710e-04
Loss = 1.1665e-01, PNorm = 77.8493, GNorm = 0.5592, lr_0 = 3.7684e-04
Loss = 1.1566e-01, PNorm = 77.8604, GNorm = 0.7699, lr_0 = 3.7659e-04
Loss = 1.1087e-01, PNorm = 77.8721, GNorm = 0.8758, lr_0 = 3.7633e-04
Loss = 1.3677e-01, PNorm = 77.8829, GNorm = 0.6899, lr_0 = 3.7607e-04
Loss = 1.3530e-01, PNorm = 77.8920, GNorm = 0.7235, lr_0 = 3.7581e-04
Loss = 1.2518e-01, PNorm = 77.8984, GNorm = 0.6292, lr_0 = 3.7555e-04
Loss = 1.3544e-01, PNorm = 77.9110, GNorm = 0.5793, lr_0 = 3.7530e-04
Loss = 1.1795e-01, PNorm = 77.9239, GNorm = 0.9079, lr_0 = 3.7504e-04
Loss = 1.2154e-01, PNorm = 77.9383, GNorm = 0.6433, lr_0 = 3.7478e-04
Loss = 1.3715e-01, PNorm = 77.9514, GNorm = 0.6136, lr_0 = 3.7453e-04
Loss = 1.0879e-01, PNorm = 77.9641, GNorm = 1.1696, lr_0 = 3.7427e-04
Loss = 1.1782e-01, PNorm = 77.9679, GNorm = 0.7573, lr_0 = 3.7401e-04
Loss = 1.4546e-01, PNorm = 77.9720, GNorm = 1.0270, lr_0 = 3.7376e-04
Loss = 1.2095e-01, PNorm = 77.9803, GNorm = 0.5284, lr_0 = 3.7350e-04
Loss = 1.3635e-01, PNorm = 77.9867, GNorm = 0.6664, lr_0 = 3.7325e-04
Loss = 1.0513e-01, PNorm = 78.0003, GNorm = 0.6281, lr_0 = 3.7299e-04
Loss = 1.2278e-01, PNorm = 78.0103, GNorm = 0.7668, lr_0 = 3.7273e-04
Validation mae = 0.233832
Epoch 14
Loss = 1.0394e-01, PNorm = 78.0213, GNorm = 0.5683, lr_0 = 3.7248e-04
Loss = 1.0497e-01, PNorm = 78.0316, GNorm = 0.3966, lr_0 = 3.7222e-04
Loss = 9.5527e-02, PNorm = 78.0403, GNorm = 0.5481, lr_0 = 3.7197e-04
Loss = 1.0720e-01, PNorm = 78.0476, GNorm = 0.7616, lr_0 = 3.7171e-04
Loss = 1.1887e-01, PNorm = 78.0572, GNorm = 0.5759, lr_0 = 3.7146e-04
Loss = 1.0292e-01, PNorm = 78.0678, GNorm = 0.4921, lr_0 = 3.7120e-04
Loss = 1.0252e-01, PNorm = 78.0731, GNorm = 0.5647, lr_0 = 3.7095e-04
Loss = 1.0921e-01, PNorm = 78.0803, GNorm = 0.5131, lr_0 = 3.7070e-04
Loss = 1.2348e-01, PNorm = 78.0866, GNorm = 0.7442, lr_0 = 3.7044e-04
Loss = 1.1237e-01, PNorm = 78.0951, GNorm = 0.9406, lr_0 = 3.7019e-04
Loss = 9.9781e-02, PNorm = 78.1034, GNorm = 0.6795, lr_0 = 3.6993e-04
Loss = 1.0528e-01, PNorm = 78.1119, GNorm = 0.6758, lr_0 = 3.6968e-04
Loss = 1.0388e-01, PNorm = 78.1171, GNorm = 0.6984, lr_0 = 3.6943e-04
Loss = 9.3854e-02, PNorm = 78.1233, GNorm = 0.6536, lr_0 = 3.6917e-04
Loss = 1.1681e-01, PNorm = 78.1326, GNorm = 0.7501, lr_0 = 3.6892e-04
Loss = 1.0095e-01, PNorm = 78.1390, GNorm = 0.5754, lr_0 = 3.6867e-04
Loss = 1.1128e-01, PNorm = 78.1478, GNorm = 1.1121, lr_0 = 3.6842e-04
Loss = 1.0800e-01, PNorm = 78.1574, GNorm = 0.8640, lr_0 = 3.6816e-04
Loss = 1.1946e-01, PNorm = 78.1663, GNorm = 1.3053, lr_0 = 3.6791e-04
Loss = 1.1366e-01, PNorm = 78.1751, GNorm = 0.6554, lr_0 = 3.6766e-04
Loss = 1.0695e-01, PNorm = 78.1870, GNorm = 0.6882, lr_0 = 3.6741e-04
Loss = 1.1926e-01, PNorm = 78.2024, GNorm = 0.7078, lr_0 = 3.6716e-04
Loss = 1.0451e-01, PNorm = 78.2169, GNorm = 0.4774, lr_0 = 3.6690e-04
Loss = 1.0285e-01, PNorm = 78.2284, GNorm = 0.6081, lr_0 = 3.6665e-04
Loss = 1.1885e-01, PNorm = 78.2411, GNorm = 0.5620, lr_0 = 3.6640e-04
Loss = 1.0875e-01, PNorm = 78.2488, GNorm = 0.9054, lr_0 = 3.6615e-04
Loss = 1.1558e-01, PNorm = 78.2553, GNorm = 0.7779, lr_0 = 3.6590e-04
Loss = 1.1781e-01, PNorm = 78.2661, GNorm = 0.6310, lr_0 = 3.6565e-04
Loss = 1.2070e-01, PNorm = 78.2739, GNorm = 0.8537, lr_0 = 3.6540e-04
Loss = 1.3313e-01, PNorm = 78.2837, GNorm = 0.6090, lr_0 = 3.6515e-04
Loss = 1.1043e-01, PNorm = 78.2967, GNorm = 1.3558, lr_0 = 3.6490e-04
Loss = 1.3063e-01, PNorm = 78.3033, GNorm = 1.1108, lr_0 = 3.6465e-04
Loss = 1.0340e-01, PNorm = 78.3151, GNorm = 0.6261, lr_0 = 3.6440e-04
Loss = 1.2737e-01, PNorm = 78.3278, GNorm = 0.7575, lr_0 = 3.6415e-04
Loss = 1.1531e-01, PNorm = 78.3362, GNorm = 1.0507, lr_0 = 3.6390e-04
Loss = 1.0585e-01, PNorm = 78.3453, GNorm = 0.5778, lr_0 = 3.6365e-04
Loss = 1.0191e-01, PNorm = 78.3515, GNorm = 0.6095, lr_0 = 3.6340e-04
Loss = 1.1581e-01, PNorm = 78.3601, GNorm = 0.6489, lr_0 = 3.6315e-04
Loss = 1.0691e-01, PNorm = 78.3678, GNorm = 0.7473, lr_0 = 3.6290e-04
Loss = 1.1667e-01, PNorm = 78.3777, GNorm = 0.4608, lr_0 = 3.6266e-04
Loss = 9.9229e-02, PNorm = 78.3889, GNorm = 0.8054, lr_0 = 3.6241e-04
Loss = 1.3847e-01, PNorm = 78.4014, GNorm = 0.6815, lr_0 = 3.6216e-04
Loss = 1.0833e-01, PNorm = 78.4153, GNorm = 0.4960, lr_0 = 3.6191e-04
Loss = 1.0142e-01, PNorm = 78.4248, GNorm = 0.8099, lr_0 = 3.6166e-04
Loss = 1.0426e-01, PNorm = 78.4362, GNorm = 0.5601, lr_0 = 3.6141e-04
Loss = 1.1406e-01, PNorm = 78.4447, GNorm = 0.5024, lr_0 = 3.6117e-04
Loss = 1.4287e-01, PNorm = 78.4554, GNorm = 1.8977, lr_0 = 3.6092e-04
Loss = 9.8209e-02, PNorm = 78.4649, GNorm = 0.7173, lr_0 = 3.6067e-04
Loss = 1.1713e-01, PNorm = 78.4733, GNorm = 0.6863, lr_0 = 3.6043e-04
Loss = 1.1662e-01, PNorm = 78.4851, GNorm = 0.7063, lr_0 = 3.6018e-04
Loss = 1.3253e-01, PNorm = 78.4961, GNorm = 0.5469, lr_0 = 3.5993e-04
Loss = 1.0367e-01, PNorm = 78.5045, GNorm = 1.0388, lr_0 = 3.5969e-04
Loss = 1.0619e-01, PNorm = 78.5129, GNorm = 0.8255, lr_0 = 3.5944e-04
Loss = 1.1943e-01, PNorm = 78.5158, GNorm = 1.1195, lr_0 = 3.5919e-04
Loss = 1.1172e-01, PNorm = 78.5222, GNorm = 0.6629, lr_0 = 3.5895e-04
Loss = 1.2365e-01, PNorm = 78.5359, GNorm = 0.5564, lr_0 = 3.5870e-04
Loss = 1.1987e-01, PNorm = 78.5486, GNorm = 0.8524, lr_0 = 3.5845e-04
Loss = 1.0049e-01, PNorm = 78.5564, GNorm = 0.7997, lr_0 = 3.5821e-04
Loss = 1.2209e-01, PNorm = 78.5665, GNorm = 0.6608, lr_0 = 3.5796e-04
Loss = 1.1807e-01, PNorm = 78.5702, GNorm = 0.5402, lr_0 = 3.5772e-04
Loss = 1.0587e-01, PNorm = 78.5781, GNorm = 0.7707, lr_0 = 3.5747e-04
Loss = 1.1311e-01, PNorm = 78.5823, GNorm = 0.6504, lr_0 = 3.5723e-04
Loss = 1.0271e-01, PNorm = 78.5924, GNorm = 0.4768, lr_0 = 3.5698e-04
Loss = 1.1439e-01, PNorm = 78.6045, GNorm = 0.5009, lr_0 = 3.5674e-04
Loss = 1.0004e-01, PNorm = 78.6148, GNorm = 0.5589, lr_0 = 3.5650e-04
Loss = 1.0278e-01, PNorm = 78.6189, GNorm = 0.9270, lr_0 = 3.5625e-04
Loss = 1.0602e-01, PNorm = 78.6222, GNorm = 0.6863, lr_0 = 3.5601e-04
Loss = 1.1505e-01, PNorm = 78.6279, GNorm = 1.0418, lr_0 = 3.5576e-04
Loss = 1.2409e-01, PNorm = 78.6390, GNorm = 0.6272, lr_0 = 3.5552e-04
Loss = 1.0812e-01, PNorm = 78.6512, GNorm = 0.8895, lr_0 = 3.5528e-04
Loss = 1.0825e-01, PNorm = 78.6552, GNorm = 0.6471, lr_0 = 3.5503e-04
Loss = 1.2898e-01, PNorm = 78.6655, GNorm = 0.8678, lr_0 = 3.5479e-04
Loss = 1.1796e-01, PNorm = 78.6717, GNorm = 0.5930, lr_0 = 3.5455e-04
Loss = 1.2162e-01, PNorm = 78.6791, GNorm = 0.5216, lr_0 = 3.5430e-04
Loss = 1.3977e-01, PNorm = 78.6839, GNorm = 0.9223, lr_0 = 3.5406e-04
Loss = 1.2744e-01, PNorm = 78.6961, GNorm = 0.6435, lr_0 = 3.5382e-04
Loss = 1.1388e-01, PNorm = 78.7029, GNorm = 0.7200, lr_0 = 3.5358e-04
Loss = 1.1839e-01, PNorm = 78.7093, GNorm = 0.6223, lr_0 = 3.5333e-04
Loss = 1.2869e-01, PNorm = 78.7198, GNorm = 0.5860, lr_0 = 3.5309e-04
Loss = 9.8947e-02, PNorm = 78.7276, GNorm = 0.9342, lr_0 = 3.5285e-04
Loss = 1.0688e-01, PNorm = 78.7354, GNorm = 0.6655, lr_0 = 3.5261e-04
Loss = 1.3483e-01, PNorm = 78.7385, GNorm = 0.6325, lr_0 = 3.5237e-04
Loss = 1.2713e-01, PNorm = 78.7452, GNorm = 0.5615, lr_0 = 3.5212e-04
Loss = 1.0634e-01, PNorm = 78.7531, GNorm = 0.8253, lr_0 = 3.5188e-04
Loss = 1.1562e-01, PNorm = 78.7604, GNorm = 0.6106, lr_0 = 3.5164e-04
Loss = 1.1265e-01, PNorm = 78.7669, GNorm = 0.7801, lr_0 = 3.5140e-04
Loss = 9.9707e-02, PNorm = 78.7737, GNorm = 0.8486, lr_0 = 3.5116e-04
Loss = 1.2113e-01, PNorm = 78.7742, GNorm = 0.5421, lr_0 = 3.5092e-04
Loss = 1.1382e-01, PNorm = 78.7789, GNorm = 0.5526, lr_0 = 3.5068e-04
Loss = 1.0438e-01, PNorm = 78.7873, GNorm = 0.6868, lr_0 = 3.5044e-04
Loss = 1.1248e-01, PNorm = 78.7956, GNorm = 0.9108, lr_0 = 3.5020e-04
Loss = 1.0732e-01, PNorm = 78.8054, GNorm = 0.6855, lr_0 = 3.4996e-04
Loss = 9.9162e-02, PNorm = 78.8184, GNorm = 0.6883, lr_0 = 3.4972e-04
Loss = 1.1326e-01, PNorm = 78.8253, GNorm = 0.6758, lr_0 = 3.4948e-04
Loss = 9.6435e-02, PNorm = 78.8287, GNorm = 0.5617, lr_0 = 3.4924e-04
Loss = 1.1344e-01, PNorm = 78.8343, GNorm = 1.1832, lr_0 = 3.4900e-04
Loss = 1.2375e-01, PNorm = 78.8383, GNorm = 0.8822, lr_0 = 3.4876e-04
Loss = 1.2069e-01, PNorm = 78.8472, GNorm = 0.6580, lr_0 = 3.4852e-04
Loss = 9.9054e-02, PNorm = 78.8609, GNorm = 0.7390, lr_0 = 3.4828e-04
Loss = 1.0285e-01, PNorm = 78.8669, GNorm = 0.8067, lr_0 = 3.4805e-04
Loss = 1.2073e-01, PNorm = 78.8721, GNorm = 1.0348, lr_0 = 3.4781e-04
Loss = 1.0274e-01, PNorm = 78.8801, GNorm = 0.9690, lr_0 = 3.4757e-04
Loss = 1.2280e-01, PNorm = 78.8857, GNorm = 0.8191, lr_0 = 3.4733e-04
Loss = 1.1788e-01, PNorm = 78.8971, GNorm = 0.6684, lr_0 = 3.4709e-04
Loss = 1.0938e-01, PNorm = 78.9027, GNorm = 1.1739, lr_0 = 3.4686e-04
Loss = 9.6703e-02, PNorm = 78.9112, GNorm = 0.5567, lr_0 = 3.4662e-04
Loss = 1.2415e-01, PNorm = 78.9160, GNorm = 0.6302, lr_0 = 3.4638e-04
Loss = 1.1504e-01, PNorm = 78.9191, GNorm = 0.7061, lr_0 = 3.4614e-04
Loss = 1.0143e-01, PNorm = 78.9266, GNorm = 0.4419, lr_0 = 3.4591e-04
Loss = 1.1430e-01, PNorm = 78.9308, GNorm = 0.6195, lr_0 = 3.4567e-04
Loss = 1.1564e-01, PNorm = 78.9386, GNorm = 0.6291, lr_0 = 3.4543e-04
Loss = 1.0046e-01, PNorm = 78.9509, GNorm = 0.8734, lr_0 = 3.4520e-04
Loss = 1.1914e-01, PNorm = 78.9613, GNorm = 0.5169, lr_0 = 3.4496e-04
Loss = 1.2568e-01, PNorm = 78.9657, GNorm = 0.7540, lr_0 = 3.4472e-04
Loss = 1.2606e-01, PNorm = 78.9713, GNorm = 0.8311, lr_0 = 3.4449e-04
Loss = 1.2364e-01, PNorm = 78.9845, GNorm = 1.0819, lr_0 = 3.4425e-04
Loss = 1.2024e-01, PNorm = 78.9933, GNorm = 0.5952, lr_0 = 3.4402e-04
Loss = 1.0878e-01, PNorm = 79.0016, GNorm = 0.8262, lr_0 = 3.4378e-04
Loss = 1.1309e-01, PNorm = 79.0099, GNorm = 0.7646, lr_0 = 3.4354e-04
Loss = 1.1085e-01, PNorm = 79.0183, GNorm = 0.7266, lr_0 = 3.4331e-04
Validation mae = 0.241595
Epoch 15
Loss = 1.0623e-01, PNorm = 79.0218, GNorm = 0.8537, lr_0 = 3.4307e-04
Loss = 1.2894e-01, PNorm = 79.0280, GNorm = 0.7493, lr_0 = 3.4284e-04
Loss = 1.0293e-01, PNorm = 79.0416, GNorm = 0.6768, lr_0 = 3.4260e-04
Loss = 1.2469e-01, PNorm = 79.0487, GNorm = 0.6945, lr_0 = 3.4237e-04
Loss = 1.0711e-01, PNorm = 79.0603, GNorm = 1.2600, lr_0 = 3.4213e-04
Loss = 1.0100e-01, PNorm = 79.0632, GNorm = 0.6233, lr_0 = 3.4190e-04
Loss = 1.0574e-01, PNorm = 79.0696, GNorm = 0.6025, lr_0 = 3.4167e-04
Loss = 1.0332e-01, PNorm = 79.0779, GNorm = 0.4931, lr_0 = 3.4143e-04
Loss = 1.0359e-01, PNorm = 79.0805, GNorm = 0.8182, lr_0 = 3.4120e-04
Loss = 1.0726e-01, PNorm = 79.0886, GNorm = 0.7137, lr_0 = 3.4096e-04
Loss = 1.0322e-01, PNorm = 79.0953, GNorm = 0.6108, lr_0 = 3.4073e-04
Loss = 9.6447e-02, PNorm = 79.1042, GNorm = 0.4626, lr_0 = 3.4050e-04
Loss = 9.5240e-02, PNorm = 79.1134, GNorm = 0.6427, lr_0 = 3.4026e-04
Loss = 1.1265e-01, PNorm = 79.1210, GNorm = 0.7840, lr_0 = 3.4003e-04
Loss = 1.1876e-01, PNorm = 79.1267, GNorm = 0.6659, lr_0 = 3.3980e-04
Loss = 1.2413e-01, PNorm = 79.1356, GNorm = 0.5924, lr_0 = 3.3956e-04
Loss = 1.0749e-01, PNorm = 79.1431, GNorm = 0.5943, lr_0 = 3.3933e-04
Loss = 1.2671e-01, PNorm = 79.1513, GNorm = 0.5683, lr_0 = 3.3910e-04
Loss = 1.0969e-01, PNorm = 79.1582, GNorm = 0.8842, lr_0 = 3.3887e-04
Loss = 1.2280e-01, PNorm = 79.1683, GNorm = 0.5808, lr_0 = 3.3864e-04
Loss = 1.2208e-01, PNorm = 79.1821, GNorm = 0.6272, lr_0 = 3.3840e-04
Loss = 1.0139e-01, PNorm = 79.1941, GNorm = 0.6573, lr_0 = 3.3817e-04
Loss = 1.1438e-01, PNorm = 79.2032, GNorm = 0.6001, lr_0 = 3.3794e-04
Loss = 8.3468e-02, PNorm = 79.2075, GNorm = 0.4488, lr_0 = 3.3771e-04
Loss = 1.1572e-01, PNorm = 79.2158, GNorm = 0.8537, lr_0 = 3.3748e-04
Loss = 1.0960e-01, PNorm = 79.2261, GNorm = 0.5332, lr_0 = 3.3725e-04
Loss = 1.0785e-01, PNorm = 79.2377, GNorm = 0.7330, lr_0 = 3.3701e-04
Loss = 1.1760e-01, PNorm = 79.2486, GNorm = 0.7221, lr_0 = 3.3678e-04
Loss = 1.0194e-01, PNorm = 79.2563, GNorm = 0.7450, lr_0 = 3.3655e-04
Loss = 1.1058e-01, PNorm = 79.2648, GNorm = 0.5246, lr_0 = 3.3632e-04
Loss = 9.5045e-02, PNorm = 79.2745, GNorm = 0.9221, lr_0 = 3.3609e-04
Loss = 1.1428e-01, PNorm = 79.2825, GNorm = 0.6268, lr_0 = 3.3586e-04
Loss = 1.1500e-01, PNorm = 79.2915, GNorm = 0.7449, lr_0 = 3.3563e-04
Loss = 1.0266e-01, PNorm = 79.2986, GNorm = 0.5710, lr_0 = 3.3540e-04
Loss = 1.1114e-01, PNorm = 79.3056, GNorm = 0.7121, lr_0 = 3.3517e-04
Loss = 1.0276e-01, PNorm = 79.3128, GNorm = 0.6904, lr_0 = 3.3494e-04
Loss = 9.8908e-02, PNorm = 79.3207, GNorm = 0.5583, lr_0 = 3.3471e-04
Loss = 1.0664e-01, PNorm = 79.3275, GNorm = 0.7518, lr_0 = 3.3448e-04
Loss = 9.8664e-02, PNorm = 79.3374, GNorm = 0.7520, lr_0 = 3.3425e-04
Loss = 9.9835e-02, PNorm = 79.3463, GNorm = 0.7368, lr_0 = 3.3403e-04
Loss = 1.1929e-01, PNorm = 79.3523, GNorm = 0.8917, lr_0 = 3.3380e-04
Loss = 1.2116e-01, PNorm = 79.3578, GNorm = 0.8946, lr_0 = 3.3357e-04
Loss = 1.0559e-01, PNorm = 79.3647, GNorm = 0.7186, lr_0 = 3.3334e-04
Loss = 1.0251e-01, PNorm = 79.3711, GNorm = 0.6295, lr_0 = 3.3311e-04
Loss = 1.1781e-01, PNorm = 79.3809, GNorm = 0.7178, lr_0 = 3.3288e-04
Loss = 1.0683e-01, PNorm = 79.3866, GNorm = 1.8053, lr_0 = 3.3265e-04
Loss = 1.2110e-01, PNorm = 79.3902, GNorm = 0.7075, lr_0 = 3.3243e-04
Loss = 1.0988e-01, PNorm = 79.3988, GNorm = 0.5469, lr_0 = 3.3220e-04
Loss = 1.0409e-01, PNorm = 79.4111, GNorm = 0.5529, lr_0 = 3.3197e-04
Loss = 1.1212e-01, PNorm = 79.4193, GNorm = 0.5982, lr_0 = 3.3174e-04
Loss = 1.2267e-01, PNorm = 79.4271, GNorm = 0.7473, lr_0 = 3.3152e-04
Loss = 1.0352e-01, PNorm = 79.4308, GNorm = 0.4717, lr_0 = 3.3129e-04
Loss = 1.1822e-01, PNorm = 79.4393, GNorm = 0.6292, lr_0 = 3.3106e-04
Loss = 1.1083e-01, PNorm = 79.4465, GNorm = 0.7352, lr_0 = 3.3084e-04
Loss = 1.0207e-01, PNorm = 79.4556, GNorm = 0.5363, lr_0 = 3.3061e-04
Loss = 1.1038e-01, PNorm = 79.4662, GNorm = 0.8940, lr_0 = 3.3038e-04
Loss = 1.0661e-01, PNorm = 79.4771, GNorm = 0.7023, lr_0 = 3.3016e-04
Loss = 1.1787e-01, PNorm = 79.4851, GNorm = 1.3305, lr_0 = 3.2993e-04
Loss = 1.0995e-01, PNorm = 79.4956, GNorm = 0.5430, lr_0 = 3.2970e-04
Loss = 9.2689e-02, PNorm = 79.5027, GNorm = 0.5962, lr_0 = 3.2948e-04
Loss = 9.7310e-02, PNorm = 79.5090, GNorm = 1.0440, lr_0 = 3.2925e-04
Loss = 1.0003e-01, PNorm = 79.5154, GNorm = 0.8637, lr_0 = 3.2903e-04
Loss = 1.1637e-01, PNorm = 79.5194, GNorm = 0.5824, lr_0 = 3.2880e-04
Loss = 1.2514e-01, PNorm = 79.5248, GNorm = 0.8059, lr_0 = 3.2858e-04
Loss = 1.1387e-01, PNorm = 79.5284, GNorm = 0.6358, lr_0 = 3.2835e-04
Loss = 1.0393e-01, PNorm = 79.5352, GNorm = 0.8648, lr_0 = 3.2813e-04
Loss = 1.2518e-01, PNorm = 79.5400, GNorm = 0.8559, lr_0 = 3.2790e-04
Loss = 1.1389e-01, PNorm = 79.5455, GNorm = 0.9651, lr_0 = 3.2768e-04
Loss = 1.0625e-01, PNorm = 79.5547, GNorm = 0.7012, lr_0 = 3.2745e-04
Loss = 1.2698e-01, PNorm = 79.5648, GNorm = 1.0450, lr_0 = 3.2723e-04
Loss = 1.1458e-01, PNorm = 79.5760, GNorm = 0.8531, lr_0 = 3.2700e-04
Loss = 9.4183e-02, PNorm = 79.5843, GNorm = 0.5312, lr_0 = 3.2678e-04
Loss = 1.1348e-01, PNorm = 79.5890, GNorm = 0.6480, lr_0 = 3.2656e-04
Loss = 1.0638e-01, PNorm = 79.5968, GNorm = 0.6470, lr_0 = 3.2633e-04
Loss = 9.9709e-02, PNorm = 79.6011, GNorm = 0.9336, lr_0 = 3.2611e-04
Loss = 1.2201e-01, PNorm = 79.6059, GNorm = 0.8304, lr_0 = 3.2589e-04
Loss = 1.1496e-01, PNorm = 79.6133, GNorm = 0.6131, lr_0 = 3.2566e-04
Loss = 1.0395e-01, PNorm = 79.6211, GNorm = 0.5666, lr_0 = 3.2544e-04
Loss = 1.0352e-01, PNorm = 79.6297, GNorm = 0.5465, lr_0 = 3.2522e-04
Loss = 1.1352e-01, PNorm = 79.6382, GNorm = 0.7026, lr_0 = 3.2499e-04
Loss = 1.1390e-01, PNorm = 79.6484, GNorm = 0.7959, lr_0 = 3.2477e-04
Loss = 1.0494e-01, PNorm = 79.6579, GNorm = 0.6278, lr_0 = 3.2455e-04
Loss = 1.0207e-01, PNorm = 79.6652, GNorm = 0.9125, lr_0 = 3.2433e-04
Loss = 1.2255e-01, PNorm = 79.6716, GNorm = 0.6384, lr_0 = 3.2410e-04
Loss = 1.0230e-01, PNorm = 79.6800, GNorm = 0.7632, lr_0 = 3.2388e-04
Loss = 1.0064e-01, PNorm = 79.6855, GNorm = 0.7314, lr_0 = 3.2366e-04
Loss = 1.2064e-01, PNorm = 79.6941, GNorm = 0.6611, lr_0 = 3.2344e-04
Loss = 9.6471e-02, PNorm = 79.6994, GNorm = 0.6130, lr_0 = 3.2322e-04
Loss = 1.0583e-01, PNorm = 79.7043, GNorm = 0.5697, lr_0 = 3.2300e-04
Loss = 1.1392e-01, PNorm = 79.7136, GNorm = 0.9462, lr_0 = 3.2277e-04
Loss = 1.1284e-01, PNorm = 79.7188, GNorm = 0.7646, lr_0 = 3.2255e-04
Loss = 9.4248e-02, PNorm = 79.7270, GNorm = 0.7185, lr_0 = 3.2233e-04
Loss = 1.0666e-01, PNorm = 79.7313, GNorm = 0.6017, lr_0 = 3.2211e-04
Loss = 1.1321e-01, PNorm = 79.7345, GNorm = 0.5025, lr_0 = 3.2189e-04
Loss = 1.0920e-01, PNorm = 79.7395, GNorm = 0.7749, lr_0 = 3.2167e-04
Loss = 1.0954e-01, PNorm = 79.7448, GNorm = 0.8345, lr_0 = 3.2145e-04
Loss = 1.1186e-01, PNorm = 79.7487, GNorm = 0.7310, lr_0 = 3.2123e-04
Loss = 9.3120e-02, PNorm = 79.7565, GNorm = 0.5878, lr_0 = 3.2101e-04
Loss = 1.0500e-01, PNorm = 79.7621, GNorm = 0.7061, lr_0 = 3.2079e-04
Loss = 1.2346e-01, PNorm = 79.7666, GNorm = 0.7462, lr_0 = 3.2057e-04
Loss = 9.8282e-02, PNorm = 79.7722, GNorm = 0.6394, lr_0 = 3.2035e-04
Loss = 9.9540e-02, PNorm = 79.7792, GNorm = 0.7570, lr_0 = 3.2013e-04
Loss = 1.1398e-01, PNorm = 79.7883, GNorm = 0.6885, lr_0 = 3.1991e-04
Loss = 1.0698e-01, PNorm = 79.7952, GNorm = 0.7105, lr_0 = 3.1969e-04
Loss = 1.2088e-01, PNorm = 79.8020, GNorm = 1.0662, lr_0 = 3.1947e-04
Loss = 1.0534e-01, PNorm = 79.8070, GNorm = 0.8214, lr_0 = 3.1925e-04
Loss = 1.1641e-01, PNorm = 79.8122, GNorm = 0.6880, lr_0 = 3.1904e-04
Loss = 1.1874e-01, PNorm = 79.8199, GNorm = 0.7261, lr_0 = 3.1882e-04
Loss = 1.0210e-01, PNorm = 79.8281, GNorm = 0.7060, lr_0 = 3.1860e-04
Loss = 1.1395e-01, PNorm = 79.8347, GNorm = 0.7192, lr_0 = 3.1838e-04
Loss = 1.1149e-01, PNorm = 79.8386, GNorm = 1.0452, lr_0 = 3.1816e-04
Loss = 1.0618e-01, PNorm = 79.8452, GNorm = 0.5088, lr_0 = 3.1794e-04
Loss = 1.0814e-01, PNorm = 79.8574, GNorm = 0.8783, lr_0 = 3.1773e-04
Loss = 9.5909e-02, PNorm = 79.8679, GNorm = 0.6071, lr_0 = 3.1751e-04
Loss = 9.9478e-02, PNorm = 79.8771, GNorm = 1.4069, lr_0 = 3.1729e-04
Loss = 1.0948e-01, PNorm = 79.8833, GNorm = 0.8614, lr_0 = 3.1707e-04
Loss = 1.1915e-01, PNorm = 79.8920, GNorm = 0.5649, lr_0 = 3.1686e-04
Loss = 1.1752e-01, PNorm = 79.8990, GNorm = 0.5950, lr_0 = 3.1664e-04
Loss = 1.1682e-01, PNorm = 79.9065, GNorm = 0.7410, lr_0 = 3.1642e-04
Loss = 9.6055e-02, PNorm = 79.9120, GNorm = 0.6519, lr_0 = 3.1621e-04
Validation mae = 0.230361
Epoch 16
Loss = 9.6746e-02, PNorm = 79.9193, GNorm = 0.6309, lr_0 = 3.1599e-04
Loss = 9.2057e-02, PNorm = 79.9261, GNorm = 0.8630, lr_0 = 3.1577e-04
Loss = 9.8251e-02, PNorm = 79.9376, GNorm = 0.5583, lr_0 = 3.1556e-04
Loss = 1.0602e-01, PNorm = 79.9445, GNorm = 0.6068, lr_0 = 3.1534e-04
Loss = 9.3765e-02, PNorm = 79.9503, GNorm = 1.1351, lr_0 = 3.1512e-04
Loss = 9.4412e-02, PNorm = 79.9549, GNorm = 0.4695, lr_0 = 3.1491e-04
Loss = 8.8972e-02, PNorm = 79.9583, GNorm = 0.4474, lr_0 = 3.1469e-04
Loss = 9.6188e-02, PNorm = 79.9637, GNorm = 0.7745, lr_0 = 3.1448e-04
Loss = 1.1048e-01, PNorm = 79.9695, GNorm = 0.7552, lr_0 = 3.1426e-04
Loss = 1.0161e-01, PNorm = 79.9737, GNorm = 0.6096, lr_0 = 3.1405e-04
Loss = 9.9898e-02, PNorm = 79.9820, GNorm = 0.8305, lr_0 = 3.1383e-04
Loss = 1.0810e-01, PNorm = 79.9878, GNorm = 0.6515, lr_0 = 3.1362e-04
Loss = 9.3669e-02, PNorm = 79.9953, GNorm = 0.8014, lr_0 = 3.1340e-04
Loss = 1.1101e-01, PNorm = 80.0042, GNorm = 0.8039, lr_0 = 3.1319e-04
Loss = 1.0153e-01, PNorm = 80.0146, GNorm = 0.7416, lr_0 = 3.1297e-04
Loss = 1.0394e-01, PNorm = 80.0224, GNorm = 0.6831, lr_0 = 3.1276e-04
Loss = 9.5310e-02, PNorm = 80.0277, GNorm = 0.7966, lr_0 = 3.1254e-04
Loss = 9.1310e-02, PNorm = 80.0349, GNorm = 0.5306, lr_0 = 3.1233e-04
Loss = 8.8962e-02, PNorm = 80.0429, GNorm = 0.7721, lr_0 = 3.1212e-04
Loss = 1.1535e-01, PNorm = 80.0547, GNorm = 0.5361, lr_0 = 3.1190e-04
Loss = 1.0596e-01, PNorm = 80.0626, GNorm = 0.6618, lr_0 = 3.1169e-04
Loss = 1.0123e-01, PNorm = 80.0661, GNorm = 0.7750, lr_0 = 3.1147e-04
Loss = 1.0270e-01, PNorm = 80.0721, GNorm = 0.6402, lr_0 = 3.1126e-04
Loss = 1.0920e-01, PNorm = 80.0778, GNorm = 0.6212, lr_0 = 3.1105e-04
Loss = 9.7503e-02, PNorm = 80.0879, GNorm = 0.7359, lr_0 = 3.1083e-04
Loss = 1.1602e-01, PNorm = 80.0965, GNorm = 0.9754, lr_0 = 3.1062e-04
Loss = 9.7502e-02, PNorm = 80.1041, GNorm = 0.4441, lr_0 = 3.1041e-04
Loss = 1.1040e-01, PNorm = 80.1118, GNorm = 0.7192, lr_0 = 3.1020e-04
Loss = 1.1043e-01, PNorm = 80.1204, GNorm = 0.9336, lr_0 = 3.0998e-04
Loss = 9.8832e-02, PNorm = 80.1289, GNorm = 0.9117, lr_0 = 3.0977e-04
Loss = 1.0271e-01, PNorm = 80.1372, GNorm = 0.6838, lr_0 = 3.0956e-04
Loss = 1.1255e-01, PNorm = 80.1457, GNorm = 0.9402, lr_0 = 3.0935e-04
Loss = 1.1558e-01, PNorm = 80.1514, GNorm = 0.5952, lr_0 = 3.0914e-04
Loss = 1.0772e-01, PNorm = 80.1557, GNorm = 0.5586, lr_0 = 3.0892e-04
Loss = 1.0071e-01, PNorm = 80.1621, GNorm = 0.7084, lr_0 = 3.0871e-04
Loss = 9.8331e-02, PNorm = 80.1680, GNorm = 0.5909, lr_0 = 3.0850e-04
Loss = 1.0442e-01, PNorm = 80.1746, GNorm = 0.6203, lr_0 = 3.0829e-04
Loss = 1.0526e-01, PNorm = 80.1841, GNorm = 0.5154, lr_0 = 3.0808e-04
Loss = 1.0782e-01, PNorm = 80.1953, GNorm = 0.7655, lr_0 = 3.0787e-04
Loss = 1.0590e-01, PNorm = 80.2034, GNorm = 0.5927, lr_0 = 3.0766e-04
Loss = 1.0196e-01, PNorm = 80.2087, GNorm = 0.8294, lr_0 = 3.0745e-04
Loss = 9.2764e-02, PNorm = 80.2124, GNorm = 0.4978, lr_0 = 3.0723e-04
Loss = 1.0135e-01, PNorm = 80.2168, GNorm = 0.5563, lr_0 = 3.0702e-04
Loss = 1.1678e-01, PNorm = 80.2208, GNorm = 0.5929, lr_0 = 3.0681e-04
Loss = 9.4285e-02, PNorm = 80.2215, GNorm = 0.7575, lr_0 = 3.0660e-04
Loss = 9.7881e-02, PNorm = 80.2287, GNorm = 0.9971, lr_0 = 3.0639e-04
Loss = 1.0210e-01, PNorm = 80.2319, GNorm = 0.6763, lr_0 = 3.0618e-04
Loss = 9.8698e-02, PNorm = 80.2360, GNorm = 1.0610, lr_0 = 3.0597e-04
Loss = 1.1268e-01, PNorm = 80.2399, GNorm = 0.9682, lr_0 = 3.0576e-04
Loss = 1.0471e-01, PNorm = 80.2483, GNorm = 0.8580, lr_0 = 3.0555e-04
Loss = 1.0507e-01, PNorm = 80.2569, GNorm = 0.7997, lr_0 = 3.0535e-04
Loss = 9.2952e-02, PNorm = 80.2677, GNorm = 0.6905, lr_0 = 3.0514e-04
Loss = 1.1241e-01, PNorm = 80.2740, GNorm = 0.5675, lr_0 = 3.0493e-04
Loss = 1.1353e-01, PNorm = 80.2787, GNorm = 0.5776, lr_0 = 3.0472e-04
Loss = 8.7317e-02, PNorm = 80.2864, GNorm = 0.4368, lr_0 = 3.0451e-04
Loss = 1.0590e-01, PNorm = 80.2945, GNorm = 0.7844, lr_0 = 3.0430e-04
Loss = 9.9025e-02, PNorm = 80.3008, GNorm = 0.5003, lr_0 = 3.0409e-04
Loss = 1.2266e-01, PNorm = 80.3097, GNorm = 0.5700, lr_0 = 3.0388e-04
Loss = 9.8366e-02, PNorm = 80.3172, GNorm = 0.8708, lr_0 = 3.0368e-04
Loss = 1.1495e-01, PNorm = 80.3222, GNorm = 0.5926, lr_0 = 3.0347e-04
Loss = 1.3004e-01, PNorm = 80.3338, GNorm = 1.3690, lr_0 = 3.0326e-04
Loss = 1.0941e-01, PNorm = 80.3441, GNorm = 0.6124, lr_0 = 3.0305e-04
Loss = 1.0582e-01, PNorm = 80.3501, GNorm = 0.6816, lr_0 = 3.0284e-04
Loss = 1.0558e-01, PNorm = 80.3563, GNorm = 0.6262, lr_0 = 3.0264e-04
Loss = 1.0773e-01, PNorm = 80.3613, GNorm = 0.5746, lr_0 = 3.0243e-04
Loss = 9.8700e-02, PNorm = 80.3664, GNorm = 0.6720, lr_0 = 3.0222e-04
Loss = 9.4335e-02, PNorm = 80.3714, GNorm = 0.5956, lr_0 = 3.0202e-04
Loss = 1.0266e-01, PNorm = 80.3754, GNorm = 0.6729, lr_0 = 3.0181e-04
Loss = 1.0320e-01, PNorm = 80.3818, GNorm = 0.6418, lr_0 = 3.0160e-04
Loss = 9.4654e-02, PNorm = 80.3895, GNorm = 0.6209, lr_0 = 3.0140e-04
Loss = 9.9023e-02, PNorm = 80.3951, GNorm = 0.6701, lr_0 = 3.0119e-04
Loss = 8.6616e-02, PNorm = 80.3984, GNorm = 0.5562, lr_0 = 3.0098e-04
Loss = 9.7295e-02, PNorm = 80.4050, GNorm = 0.8173, lr_0 = 3.0078e-04
Loss = 1.0069e-01, PNorm = 80.4078, GNorm = 0.8685, lr_0 = 3.0057e-04
Loss = 1.1435e-01, PNorm = 80.4123, GNorm = 0.8712, lr_0 = 3.0036e-04
Loss = 1.0269e-01, PNorm = 80.4196, GNorm = 0.7895, lr_0 = 3.0016e-04
Loss = 1.0784e-01, PNorm = 80.4263, GNorm = 0.6745, lr_0 = 2.9995e-04
Loss = 1.0496e-01, PNorm = 80.4328, GNorm = 0.5823, lr_0 = 2.9975e-04
Loss = 1.1737e-01, PNorm = 80.4401, GNorm = 0.5681, lr_0 = 2.9954e-04
Loss = 1.1946e-01, PNorm = 80.4494, GNorm = 0.8001, lr_0 = 2.9934e-04
Loss = 1.0940e-01, PNorm = 80.4553, GNorm = 0.6885, lr_0 = 2.9913e-04
Loss = 1.0839e-01, PNorm = 80.4642, GNorm = 0.5559, lr_0 = 2.9893e-04
Loss = 1.0213e-01, PNorm = 80.4709, GNorm = 0.7228, lr_0 = 2.9872e-04
Loss = 8.9224e-02, PNorm = 80.4756, GNorm = 0.6601, lr_0 = 2.9852e-04
Loss = 1.2401e-01, PNorm = 80.4836, GNorm = 0.5718, lr_0 = 2.9831e-04
Loss = 1.2559e-01, PNorm = 80.4901, GNorm = 1.7648, lr_0 = 2.9811e-04
Loss = 1.0252e-01, PNorm = 80.4965, GNorm = 0.9104, lr_0 = 2.9790e-04
Loss = 9.8361e-02, PNorm = 80.5050, GNorm = 0.6574, lr_0 = 2.9770e-04
Loss = 9.7383e-02, PNorm = 80.5135, GNorm = 0.6274, lr_0 = 2.9750e-04
Loss = 1.0209e-01, PNorm = 80.5215, GNorm = 0.6472, lr_0 = 2.9729e-04
Loss = 1.0663e-01, PNorm = 80.5299, GNorm = 1.0679, lr_0 = 2.9709e-04
Loss = 1.0505e-01, PNorm = 80.5371, GNorm = 0.7069, lr_0 = 2.9689e-04
Loss = 9.6704e-02, PNorm = 80.5422, GNorm = 0.6711, lr_0 = 2.9668e-04
Loss = 1.1214e-01, PNorm = 80.5457, GNorm = 0.6977, lr_0 = 2.9648e-04
Loss = 1.2490e-01, PNorm = 80.5512, GNorm = 0.6700, lr_0 = 2.9628e-04
Loss = 9.8368e-02, PNorm = 80.5591, GNorm = 0.8912, lr_0 = 2.9607e-04
Loss = 1.1562e-01, PNorm = 80.5636, GNorm = 0.9994, lr_0 = 2.9587e-04
Loss = 1.0571e-01, PNorm = 80.5678, GNorm = 0.7528, lr_0 = 2.9567e-04
Loss = 1.0789e-01, PNorm = 80.5759, GNorm = 0.5378, lr_0 = 2.9546e-04
Loss = 1.0686e-01, PNorm = 80.5806, GNorm = 0.9198, lr_0 = 2.9526e-04
Loss = 1.0870e-01, PNorm = 80.5828, GNorm = 0.9461, lr_0 = 2.9506e-04
Loss = 1.0258e-01, PNorm = 80.5895, GNorm = 0.6466, lr_0 = 2.9486e-04
Loss = 9.8205e-02, PNorm = 80.5962, GNorm = 0.6374, lr_0 = 2.9466e-04
Loss = 1.0932e-01, PNorm = 80.6037, GNorm = 0.8800, lr_0 = 2.9445e-04
Loss = 1.0567e-01, PNorm = 80.6132, GNorm = 0.9018, lr_0 = 2.9425e-04
Loss = 1.1532e-01, PNorm = 80.6198, GNorm = 0.8655, lr_0 = 2.9405e-04
Loss = 1.0407e-01, PNorm = 80.6233, GNorm = 0.5543, lr_0 = 2.9385e-04
Loss = 9.9764e-02, PNorm = 80.6281, GNorm = 1.0127, lr_0 = 2.9365e-04
Loss = 1.0972e-01, PNorm = 80.6338, GNorm = 0.7537, lr_0 = 2.9345e-04
Loss = 1.1994e-01, PNorm = 80.6420, GNorm = 0.8726, lr_0 = 2.9325e-04
Loss = 1.0259e-01, PNorm = 80.6463, GNorm = 1.0971, lr_0 = 2.9305e-04
Loss = 1.2288e-01, PNorm = 80.6524, GNorm = 0.6861, lr_0 = 2.9284e-04
Loss = 1.0862e-01, PNorm = 80.6580, GNorm = 0.5523, lr_0 = 2.9264e-04
Loss = 1.1849e-01, PNorm = 80.6643, GNorm = 0.8365, lr_0 = 2.9244e-04
Loss = 9.9698e-02, PNorm = 80.6695, GNorm = 0.6254, lr_0 = 2.9224e-04
Loss = 9.2204e-02, PNorm = 80.6759, GNorm = 0.5804, lr_0 = 2.9204e-04
Loss = 9.6877e-02, PNorm = 80.6825, GNorm = 0.6116, lr_0 = 2.9184e-04
Loss = 1.1376e-01, PNorm = 80.6867, GNorm = 0.8760, lr_0 = 2.9164e-04
Loss = 1.0376e-01, PNorm = 80.6903, GNorm = 0.5534, lr_0 = 2.9144e-04
Loss = 1.1851e-01, PNorm = 80.6965, GNorm = 0.5144, lr_0 = 2.9124e-04
Validation mae = 0.228518
Epoch 17
Loss = 1.0240e-01, PNorm = 80.7010, GNorm = 0.7379, lr_0 = 2.9104e-04
Loss = 9.2758e-02, PNorm = 80.7076, GNorm = 0.7067, lr_0 = 2.9084e-04
Loss = 8.1707e-02, PNorm = 80.7128, GNorm = 0.6003, lr_0 = 2.9065e-04
Loss = 9.2518e-02, PNorm = 80.7208, GNorm = 0.5191, lr_0 = 2.9045e-04
Loss = 8.5203e-02, PNorm = 80.7283, GNorm = 0.5580, lr_0 = 2.9025e-04
Loss = 1.1065e-01, PNorm = 80.7313, GNorm = 0.6542, lr_0 = 2.9005e-04
Loss = 9.5417e-02, PNorm = 80.7360, GNorm = 0.5148, lr_0 = 2.8985e-04
Loss = 8.1889e-02, PNorm = 80.7419, GNorm = 0.7757, lr_0 = 2.8965e-04
Loss = 8.5949e-02, PNorm = 80.7454, GNorm = 0.4928, lr_0 = 2.8945e-04
Loss = 1.0299e-01, PNorm = 80.7536, GNorm = 0.5558, lr_0 = 2.8925e-04
Loss = 9.7005e-02, PNorm = 80.7613, GNorm = 0.5987, lr_0 = 2.8906e-04
Loss = 9.5732e-02, PNorm = 80.7677, GNorm = 0.6101, lr_0 = 2.8886e-04
Loss = 9.2168e-02, PNorm = 80.7745, GNorm = 0.6199, lr_0 = 2.8866e-04
Loss = 1.0917e-01, PNorm = 80.7807, GNorm = 0.5656, lr_0 = 2.8846e-04
Loss = 9.8416e-02, PNorm = 80.7861, GNorm = 0.6150, lr_0 = 2.8826e-04
Loss = 8.7896e-02, PNorm = 80.7898, GNorm = 0.6905, lr_0 = 2.8807e-04
Loss = 9.6263e-02, PNorm = 80.7979, GNorm = 0.9348, lr_0 = 2.8787e-04
Loss = 1.0271e-01, PNorm = 80.8051, GNorm = 0.6529, lr_0 = 2.8767e-04
Loss = 9.1089e-02, PNorm = 80.8127, GNorm = 0.7841, lr_0 = 2.8748e-04
Loss = 1.1745e-01, PNorm = 80.8180, GNorm = 1.2275, lr_0 = 2.8728e-04
Loss = 1.0537e-01, PNorm = 80.8212, GNorm = 0.8489, lr_0 = 2.8708e-04
Loss = 9.6339e-02, PNorm = 80.8269, GNorm = 0.9567, lr_0 = 2.8689e-04
Loss = 1.0829e-01, PNorm = 80.8361, GNorm = 1.3017, lr_0 = 2.8669e-04
Loss = 1.1051e-01, PNorm = 80.8430, GNorm = 0.8109, lr_0 = 2.8649e-04
Loss = 9.4212e-02, PNorm = 80.8482, GNorm = 0.5887, lr_0 = 2.8630e-04
Loss = 9.3476e-02, PNorm = 80.8536, GNorm = 0.7607, lr_0 = 2.8610e-04
Loss = 9.4044e-02, PNorm = 80.8633, GNorm = 0.6844, lr_0 = 2.8590e-04
Loss = 8.1406e-02, PNorm = 80.8705, GNorm = 0.5925, lr_0 = 2.8571e-04
Loss = 1.0260e-01, PNorm = 80.8762, GNorm = 0.6268, lr_0 = 2.8551e-04
Loss = 9.7468e-02, PNorm = 80.8827, GNorm = 0.5299, lr_0 = 2.8532e-04
Loss = 9.8116e-02, PNorm = 80.8882, GNorm = 0.7167, lr_0 = 2.8512e-04
Loss = 1.0767e-01, PNorm = 80.8949, GNorm = 0.8981, lr_0 = 2.8493e-04
Loss = 1.0142e-01, PNorm = 80.9005, GNorm = 0.6020, lr_0 = 2.8473e-04
Loss = 1.1529e-01, PNorm = 80.9077, GNorm = 0.4828, lr_0 = 2.8454e-04
Loss = 9.7778e-02, PNorm = 80.9144, GNorm = 0.6839, lr_0 = 2.8434e-04
Loss = 9.3582e-02, PNorm = 80.9174, GNorm = 0.8373, lr_0 = 2.8415e-04
Loss = 9.8196e-02, PNorm = 80.9246, GNorm = 0.6182, lr_0 = 2.8395e-04
Loss = 9.3283e-02, PNorm = 80.9279, GNorm = 0.4734, lr_0 = 2.8376e-04
Loss = 9.9704e-02, PNorm = 80.9322, GNorm = 0.7132, lr_0 = 2.8356e-04
Loss = 1.0417e-01, PNorm = 80.9380, GNorm = 0.6427, lr_0 = 2.8337e-04
Loss = 1.0921e-01, PNorm = 80.9461, GNorm = 0.7121, lr_0 = 2.8317e-04
Loss = 1.0676e-01, PNorm = 80.9522, GNorm = 0.6186, lr_0 = 2.8298e-04
Loss = 9.8937e-02, PNorm = 80.9558, GNorm = 0.8502, lr_0 = 2.8279e-04
Loss = 1.0453e-01, PNorm = 80.9619, GNorm = 0.5985, lr_0 = 2.8259e-04
Loss = 1.0424e-01, PNorm = 80.9697, GNorm = 0.7781, lr_0 = 2.8240e-04
Loss = 1.1424e-01, PNorm = 80.9774, GNorm = 0.8819, lr_0 = 2.8221e-04
Loss = 8.4518e-02, PNorm = 80.9842, GNorm = 0.6805, lr_0 = 2.8201e-04
Loss = 1.0360e-01, PNorm = 80.9917, GNorm = 0.8630, lr_0 = 2.8182e-04
Loss = 9.0752e-02, PNorm = 80.9979, GNorm = 0.7834, lr_0 = 2.8163e-04
Loss = 1.2268e-01, PNorm = 81.0014, GNorm = 0.5807, lr_0 = 2.8143e-04
Loss = 1.2203e-01, PNorm = 81.0085, GNorm = 0.7183, lr_0 = 2.8124e-04
Loss = 1.0870e-01, PNorm = 81.0135, GNorm = 0.7263, lr_0 = 2.8105e-04
Loss = 9.6667e-02, PNorm = 81.0171, GNorm = 0.8052, lr_0 = 2.8085e-04
Loss = 1.0655e-01, PNorm = 81.0223, GNorm = 0.6324, lr_0 = 2.8066e-04
Loss = 1.1543e-01, PNorm = 81.0281, GNorm = 0.6800, lr_0 = 2.8047e-04
Loss = 1.0530e-01, PNorm = 81.0350, GNorm = 0.7714, lr_0 = 2.8028e-04
Loss = 1.0714e-01, PNorm = 81.0431, GNorm = 0.5266, lr_0 = 2.8009e-04
Loss = 1.1393e-01, PNorm = 81.0507, GNorm = 0.5625, lr_0 = 2.7989e-04
Loss = 1.0910e-01, PNorm = 81.0586, GNorm = 0.6703, lr_0 = 2.7970e-04
Loss = 9.6437e-02, PNorm = 81.0636, GNorm = 0.6573, lr_0 = 2.7951e-04
Loss = 8.9404e-02, PNorm = 81.0691, GNorm = 0.5888, lr_0 = 2.7932e-04
Loss = 9.8572e-02, PNorm = 81.0735, GNorm = 0.7079, lr_0 = 2.7913e-04
Loss = 1.0372e-01, PNorm = 81.0816, GNorm = 0.6132, lr_0 = 2.7894e-04
Loss = 1.0230e-01, PNorm = 81.0865, GNorm = 0.9825, lr_0 = 2.7875e-04
Loss = 9.9090e-02, PNorm = 81.0956, GNorm = 0.5640, lr_0 = 2.7855e-04
Loss = 9.7837e-02, PNorm = 81.1051, GNorm = 0.6478, lr_0 = 2.7836e-04
Loss = 8.7402e-02, PNorm = 81.1153, GNorm = 0.6239, lr_0 = 2.7817e-04
Loss = 9.4114e-02, PNorm = 81.1200, GNorm = 0.6335, lr_0 = 2.7798e-04
Loss = 1.2004e-01, PNorm = 81.1256, GNorm = 1.1564, lr_0 = 2.7779e-04
Loss = 8.8569e-02, PNorm = 81.1286, GNorm = 0.5981, lr_0 = 2.7760e-04
Loss = 9.4694e-02, PNorm = 81.1338, GNorm = 0.5714, lr_0 = 2.7741e-04
Loss = 1.1196e-01, PNorm = 81.1376, GNorm = 1.2175, lr_0 = 2.7722e-04
Loss = 1.0433e-01, PNorm = 81.1447, GNorm = 0.7418, lr_0 = 2.7703e-04
Loss = 9.5910e-02, PNorm = 81.1493, GNorm = 0.5023, lr_0 = 2.7684e-04
Loss = 9.6736e-02, PNorm = 81.1567, GNorm = 0.7305, lr_0 = 2.7665e-04
Loss = 1.0082e-01, PNorm = 81.1616, GNorm = 0.7974, lr_0 = 2.7646e-04
Loss = 1.0065e-01, PNorm = 81.1674, GNorm = 0.8991, lr_0 = 2.7627e-04
Loss = 1.1613e-01, PNorm = 81.1721, GNorm = 0.8585, lr_0 = 2.7608e-04
Loss = 1.1221e-01, PNorm = 81.1783, GNorm = 0.9316, lr_0 = 2.7590e-04
Loss = 1.2616e-01, PNorm = 81.1834, GNorm = 1.0437, lr_0 = 2.7571e-04
Loss = 9.2649e-02, PNorm = 81.1887, GNorm = 0.6026, lr_0 = 2.7552e-04
Loss = 1.0802e-01, PNorm = 81.1961, GNorm = 0.7427, lr_0 = 2.7533e-04
Loss = 1.0846e-01, PNorm = 81.2033, GNorm = 0.6458, lr_0 = 2.7514e-04
Loss = 1.0197e-01, PNorm = 81.2060, GNorm = 0.9832, lr_0 = 2.7495e-04
Loss = 9.9748e-02, PNorm = 81.2099, GNorm = 0.6161, lr_0 = 2.7476e-04
Loss = 9.7407e-02, PNorm = 81.2179, GNorm = 0.5404, lr_0 = 2.7457e-04
Loss = 1.0191e-01, PNorm = 81.2228, GNorm = 0.5489, lr_0 = 2.7439e-04
Loss = 1.0042e-01, PNorm = 81.2300, GNorm = 0.7997, lr_0 = 2.7420e-04
Loss = 1.0033e-01, PNorm = 81.2386, GNorm = 0.6549, lr_0 = 2.7401e-04
Loss = 1.0719e-01, PNorm = 81.2457, GNorm = 0.8134, lr_0 = 2.7382e-04
Loss = 1.0215e-01, PNorm = 81.2527, GNorm = 0.8131, lr_0 = 2.7364e-04
Loss = 1.0072e-01, PNorm = 81.2574, GNorm = 0.6472, lr_0 = 2.7345e-04
Loss = 9.0599e-02, PNorm = 81.2629, GNorm = 0.7196, lr_0 = 2.7326e-04
Loss = 1.1006e-01, PNorm = 81.2646, GNorm = 0.5327, lr_0 = 2.7307e-04
Loss = 1.0616e-01, PNorm = 81.2689, GNorm = 0.7319, lr_0 = 2.7289e-04
Loss = 9.5492e-02, PNorm = 81.2743, GNorm = 0.5294, lr_0 = 2.7270e-04
Loss = 1.1209e-01, PNorm = 81.2767, GNorm = 0.7387, lr_0 = 2.7251e-04
Loss = 1.0376e-01, PNorm = 81.2796, GNorm = 0.7848, lr_0 = 2.7233e-04
Loss = 9.5793e-02, PNorm = 81.2883, GNorm = 0.7882, lr_0 = 2.7214e-04
Loss = 1.0066e-01, PNorm = 81.2964, GNorm = 0.8179, lr_0 = 2.7195e-04
Loss = 1.0703e-01, PNorm = 81.3013, GNorm = 0.8508, lr_0 = 2.7177e-04
Loss = 1.0625e-01, PNorm = 81.3078, GNorm = 0.6812, lr_0 = 2.7158e-04
Loss = 1.0404e-01, PNorm = 81.3124, GNorm = 0.7284, lr_0 = 2.7139e-04
Loss = 9.3401e-02, PNorm = 81.3164, GNorm = 0.5785, lr_0 = 2.7121e-04
Loss = 1.0980e-01, PNorm = 81.3208, GNorm = 0.6567, lr_0 = 2.7102e-04
Loss = 9.3098e-02, PNorm = 81.3226, GNorm = 0.7816, lr_0 = 2.7084e-04
Loss = 1.2576e-01, PNorm = 81.3266, GNorm = 0.8968, lr_0 = 2.7065e-04
Loss = 9.0611e-02, PNorm = 81.3291, GNorm = 0.5427, lr_0 = 2.7047e-04
Loss = 1.1395e-01, PNorm = 81.3336, GNorm = 0.5903, lr_0 = 2.7028e-04
Loss = 9.6695e-02, PNorm = 81.3396, GNorm = 0.6373, lr_0 = 2.7010e-04
Loss = 9.1671e-02, PNorm = 81.3451, GNorm = 0.7325, lr_0 = 2.6991e-04
Loss = 1.1441e-01, PNorm = 81.3528, GNorm = 0.7714, lr_0 = 2.6973e-04
Loss = 1.1924e-01, PNorm = 81.3576, GNorm = 0.6311, lr_0 = 2.6954e-04
Loss = 1.1301e-01, PNorm = 81.3649, GNorm = 0.6915, lr_0 = 2.6936e-04
Loss = 1.1324e-01, PNorm = 81.3709, GNorm = 0.6231, lr_0 = 2.6917e-04
Loss = 8.9653e-02, PNorm = 81.3773, GNorm = 0.6011, lr_0 = 2.6899e-04
Loss = 9.4137e-02, PNorm = 81.3841, GNorm = 0.5463, lr_0 = 2.6880e-04
Loss = 1.0241e-01, PNorm = 81.3868, GNorm = 0.4988, lr_0 = 2.6862e-04
Loss = 1.0552e-01, PNorm = 81.3904, GNorm = 0.7927, lr_0 = 2.6844e-04
Loss = 9.4967e-02, PNorm = 81.3943, GNorm = 0.7076, lr_0 = 2.6825e-04
Validation mae = 0.234012
Epoch 18
Loss = 1.0531e-01, PNorm = 81.4009, GNorm = 0.5403, lr_0 = 2.6807e-04
Loss = 8.6669e-02, PNorm = 81.4077, GNorm = 0.5959, lr_0 = 2.6788e-04
Loss = 8.8754e-02, PNorm = 81.4112, GNorm = 0.5717, lr_0 = 2.6770e-04
Loss = 9.9497e-02, PNorm = 81.4168, GNorm = 0.5890, lr_0 = 2.6752e-04
Loss = 9.4510e-02, PNorm = 81.4194, GNorm = 0.7309, lr_0 = 2.6733e-04
Loss = 8.5061e-02, PNorm = 81.4233, GNorm = 0.6243, lr_0 = 2.6715e-04
Loss = 8.8393e-02, PNorm = 81.4269, GNorm = 0.4649, lr_0 = 2.6697e-04
Loss = 9.2776e-02, PNorm = 81.4325, GNorm = 0.6478, lr_0 = 2.6678e-04
Loss = 1.0322e-01, PNorm = 81.4367, GNorm = 0.6324, lr_0 = 2.6660e-04
Loss = 8.9386e-02, PNorm = 81.4412, GNorm = 0.5042, lr_0 = 2.6642e-04
Loss = 9.0374e-02, PNorm = 81.4487, GNorm = 0.7451, lr_0 = 2.6624e-04
Loss = 8.2559e-02, PNorm = 81.4543, GNorm = 0.7766, lr_0 = 2.6605e-04
Loss = 1.1161e-01, PNorm = 81.4600, GNorm = 0.8817, lr_0 = 2.6587e-04
Loss = 1.0342e-01, PNorm = 81.4659, GNorm = 0.8892, lr_0 = 2.6569e-04
Loss = 9.3401e-02, PNorm = 81.4698, GNorm = 0.6533, lr_0 = 2.6551e-04
Loss = 8.8295e-02, PNorm = 81.4758, GNorm = 0.4854, lr_0 = 2.6533e-04
Loss = 9.8707e-02, PNorm = 81.4784, GNorm = 0.8592, lr_0 = 2.6514e-04
Loss = 1.0011e-01, PNorm = 81.4810, GNorm = 1.1377, lr_0 = 2.6496e-04
Loss = 9.4985e-02, PNorm = 81.4881, GNorm = 0.7721, lr_0 = 2.6478e-04
Loss = 1.0709e-01, PNorm = 81.4951, GNorm = 0.8081, lr_0 = 2.6460e-04
Loss = 1.0560e-01, PNorm = 81.5038, GNorm = 0.8065, lr_0 = 2.6442e-04
Loss = 9.2645e-02, PNorm = 81.5140, GNorm = 0.6445, lr_0 = 2.6424e-04
Loss = 9.3400e-02, PNorm = 81.5189, GNorm = 0.5012, lr_0 = 2.6406e-04
Loss = 1.1047e-01, PNorm = 81.5241, GNorm = 0.6028, lr_0 = 2.6388e-04
Loss = 1.1107e-01, PNorm = 81.5290, GNorm = 0.7464, lr_0 = 2.6369e-04
Loss = 1.0050e-01, PNorm = 81.5340, GNorm = 1.2820, lr_0 = 2.6351e-04
Loss = 1.0360e-01, PNorm = 81.5375, GNorm = 0.5811, lr_0 = 2.6333e-04
Loss = 1.0254e-01, PNorm = 81.5411, GNorm = 0.6705, lr_0 = 2.6315e-04
Loss = 1.0113e-01, PNorm = 81.5483, GNorm = 0.7366, lr_0 = 2.6297e-04
Loss = 1.0762e-01, PNorm = 81.5514, GNorm = 0.8998, lr_0 = 2.6279e-04
Loss = 9.1103e-02, PNorm = 81.5578, GNorm = 0.8599, lr_0 = 2.6261e-04
Loss = 8.6812e-02, PNorm = 81.5657, GNorm = 0.5129, lr_0 = 2.6243e-04
Loss = 8.1611e-02, PNorm = 81.5719, GNorm = 0.5353, lr_0 = 2.6225e-04
Loss = 9.4666e-02, PNorm = 81.5742, GNorm = 0.7991, lr_0 = 2.6207e-04
Loss = 1.0183e-01, PNorm = 81.5789, GNorm = 0.7039, lr_0 = 2.6189e-04
Loss = 1.0351e-01, PNorm = 81.5840, GNorm = 0.9645, lr_0 = 2.6171e-04
Loss = 9.9502e-02, PNorm = 81.5912, GNorm = 0.6192, lr_0 = 2.6153e-04
Loss = 1.1648e-01, PNorm = 81.5995, GNorm = 1.0138, lr_0 = 2.6136e-04
Loss = 9.8256e-02, PNorm = 81.6068, GNorm = 1.0103, lr_0 = 2.6118e-04
Loss = 9.3288e-02, PNorm = 81.6124, GNorm = 0.6035, lr_0 = 2.6100e-04
Loss = 1.0534e-01, PNorm = 81.6183, GNorm = 0.7951, lr_0 = 2.6082e-04
Loss = 8.7553e-02, PNorm = 81.6211, GNorm = 0.7100, lr_0 = 2.6064e-04
Loss = 9.7165e-02, PNorm = 81.6264, GNorm = 0.7997, lr_0 = 2.6046e-04
Loss = 8.6420e-02, PNorm = 81.6322, GNorm = 0.5789, lr_0 = 2.6028e-04
Loss = 1.0066e-01, PNorm = 81.6365, GNorm = 0.5910, lr_0 = 2.6011e-04
Loss = 1.0310e-01, PNorm = 81.6401, GNorm = 0.7971, lr_0 = 2.5993e-04
Loss = 9.4254e-02, PNorm = 81.6406, GNorm = 0.5650, lr_0 = 2.5975e-04
Loss = 9.4523e-02, PNorm = 81.6467, GNorm = 0.6304, lr_0 = 2.5957e-04
Loss = 1.0215e-01, PNorm = 81.6528, GNorm = 0.5721, lr_0 = 2.5939e-04
Loss = 9.4801e-02, PNorm = 81.6561, GNorm = 0.7487, lr_0 = 2.5922e-04
Loss = 1.0953e-01, PNorm = 81.6606, GNorm = 0.5821, lr_0 = 2.5904e-04
Loss = 1.0186e-01, PNorm = 81.6679, GNorm = 0.7736, lr_0 = 2.5886e-04
Loss = 9.6869e-02, PNorm = 81.6751, GNorm = 0.6241, lr_0 = 2.5868e-04
Loss = 9.0883e-02, PNorm = 81.6833, GNorm = 0.7472, lr_0 = 2.5851e-04
Loss = 9.8445e-02, PNorm = 81.6913, GNorm = 0.5778, lr_0 = 2.5833e-04
Loss = 8.7455e-02, PNorm = 81.6960, GNorm = 0.7380, lr_0 = 2.5815e-04
Loss = 8.5065e-02, PNorm = 81.7015, GNorm = 0.6211, lr_0 = 2.5797e-04
Loss = 9.5742e-02, PNorm = 81.7044, GNorm = 0.7523, lr_0 = 2.5780e-04
Loss = 1.0299e-01, PNorm = 81.7079, GNorm = 1.0671, lr_0 = 2.5762e-04
Loss = 9.5991e-02, PNorm = 81.7104, GNorm = 0.4890, lr_0 = 2.5745e-04
Loss = 1.0084e-01, PNorm = 81.7148, GNorm = 0.5831, lr_0 = 2.5727e-04
Loss = 9.3255e-02, PNorm = 81.7214, GNorm = 0.5330, lr_0 = 2.5709e-04
Loss = 1.1105e-01, PNorm = 81.7275, GNorm = 0.6762, lr_0 = 2.5692e-04
Loss = 1.0498e-01, PNorm = 81.7345, GNorm = 0.7281, lr_0 = 2.5674e-04
Loss = 9.9016e-02, PNorm = 81.7423, GNorm = 0.5529, lr_0 = 2.5656e-04
Loss = 9.2046e-02, PNorm = 81.7491, GNorm = 0.6929, lr_0 = 2.5639e-04
Loss = 9.3432e-02, PNorm = 81.7511, GNorm = 0.7047, lr_0 = 2.5621e-04
Loss = 1.1249e-01, PNorm = 81.7529, GNorm = 0.5875, lr_0 = 2.5604e-04
Loss = 1.0283e-01, PNorm = 81.7556, GNorm = 0.7902, lr_0 = 2.5586e-04
Loss = 9.1086e-02, PNorm = 81.7592, GNorm = 0.4541, lr_0 = 2.5569e-04
Loss = 1.0931e-01, PNorm = 81.7641, GNorm = 0.7284, lr_0 = 2.5551e-04
Loss = 1.0055e-01, PNorm = 81.7680, GNorm = 0.6979, lr_0 = 2.5534e-04
Loss = 9.8078e-02, PNorm = 81.7740, GNorm = 0.5819, lr_0 = 2.5516e-04
Loss = 9.9286e-02, PNorm = 81.7799, GNorm = 1.1696, lr_0 = 2.5499e-04
Loss = 1.0365e-01, PNorm = 81.7828, GNorm = 0.5708, lr_0 = 2.5481e-04
Loss = 8.3276e-02, PNorm = 81.7885, GNorm = 0.8301, lr_0 = 2.5464e-04
Loss = 1.0989e-01, PNorm = 81.7955, GNorm = 0.7084, lr_0 = 2.5446e-04
Loss = 9.1750e-02, PNorm = 81.7973, GNorm = 0.4440, lr_0 = 2.5429e-04
Loss = 9.7216e-02, PNorm = 81.8032, GNorm = 0.5651, lr_0 = 2.5411e-04
Loss = 8.7752e-02, PNorm = 81.8084, GNorm = 0.4928, lr_0 = 2.5394e-04
Loss = 8.5003e-02, PNorm = 81.8147, GNorm = 0.6541, lr_0 = 2.5377e-04
Loss = 8.9016e-02, PNorm = 81.8202, GNorm = 0.5893, lr_0 = 2.5359e-04
Loss = 9.1133e-02, PNorm = 81.8268, GNorm = 0.7483, lr_0 = 2.5342e-04
Loss = 1.0275e-01, PNorm = 81.8313, GNorm = 0.7919, lr_0 = 2.5325e-04
Loss = 1.0315e-01, PNorm = 81.8340, GNorm = 0.6919, lr_0 = 2.5307e-04
Loss = 1.0860e-01, PNorm = 81.8408, GNorm = 0.7374, lr_0 = 2.5290e-04
Loss = 1.0405e-01, PNorm = 81.8476, GNorm = 0.5386, lr_0 = 2.5273e-04
Loss = 8.9266e-02, PNorm = 81.8530, GNorm = 0.3882, lr_0 = 2.5255e-04
Loss = 9.7122e-02, PNorm = 81.8577, GNorm = 0.6491, lr_0 = 2.5238e-04
Loss = 9.0767e-02, PNorm = 81.8609, GNorm = 0.5531, lr_0 = 2.5221e-04
Loss = 9.1076e-02, PNorm = 81.8641, GNorm = 0.5621, lr_0 = 2.5203e-04
Loss = 9.7482e-02, PNorm = 81.8690, GNorm = 0.5609, lr_0 = 2.5186e-04
Loss = 1.0505e-01, PNorm = 81.8743, GNorm = 0.6979, lr_0 = 2.5169e-04
Loss = 8.9330e-02, PNorm = 81.8800, GNorm = 0.6772, lr_0 = 2.5152e-04
Loss = 8.6784e-02, PNorm = 81.8841, GNorm = 0.5361, lr_0 = 2.5134e-04
Loss = 1.0802e-01, PNorm = 81.8868, GNorm = 0.7763, lr_0 = 2.5117e-04
Loss = 9.1784e-02, PNorm = 81.8920, GNorm = 0.5252, lr_0 = 2.5100e-04
Loss = 1.0221e-01, PNorm = 81.8969, GNorm = 0.6783, lr_0 = 2.5083e-04
Loss = 1.0401e-01, PNorm = 81.9021, GNorm = 0.5435, lr_0 = 2.5066e-04
Loss = 9.0629e-02, PNorm = 81.9069, GNorm = 0.6555, lr_0 = 2.5048e-04
Loss = 9.4196e-02, PNorm = 81.9127, GNorm = 0.7927, lr_0 = 2.5031e-04
Loss = 1.1776e-01, PNorm = 81.9182, GNorm = 0.8933, lr_0 = 2.5014e-04
Loss = 9.0664e-02, PNorm = 81.9221, GNorm = 0.6754, lr_0 = 2.4997e-04
Loss = 1.0474e-01, PNorm = 81.9270, GNorm = 0.8788, lr_0 = 2.4980e-04
Loss = 8.6557e-02, PNorm = 81.9310, GNorm = 0.4936, lr_0 = 2.4963e-04
Loss = 1.1150e-01, PNorm = 81.9380, GNorm = 1.0048, lr_0 = 2.4946e-04
Loss = 1.0865e-01, PNorm = 81.9406, GNorm = 0.9740, lr_0 = 2.4929e-04
Loss = 1.0121e-01, PNorm = 81.9443, GNorm = 0.5680, lr_0 = 2.4911e-04
Loss = 1.0680e-01, PNorm = 81.9466, GNorm = 0.8643, lr_0 = 2.4894e-04
Loss = 9.7428e-02, PNorm = 81.9508, GNorm = 0.4874, lr_0 = 2.4877e-04
Loss = 1.0175e-01, PNorm = 81.9551, GNorm = 0.6396, lr_0 = 2.4860e-04
Loss = 1.0883e-01, PNorm = 81.9600, GNorm = 0.6352, lr_0 = 2.4843e-04
Loss = 1.0166e-01, PNorm = 81.9655, GNorm = 0.7835, lr_0 = 2.4826e-04
Loss = 1.0307e-01, PNorm = 81.9703, GNorm = 0.6715, lr_0 = 2.4809e-04
Loss = 1.0659e-01, PNorm = 81.9763, GNorm = 0.6603, lr_0 = 2.4792e-04
Loss = 9.8939e-02, PNorm = 81.9809, GNorm = 0.5324, lr_0 = 2.4775e-04
Loss = 1.1084e-01, PNorm = 81.9872, GNorm = 0.5861, lr_0 = 2.4758e-04
Loss = 1.0571e-01, PNorm = 81.9928, GNorm = 0.7150, lr_0 = 2.4741e-04
Loss = 9.3834e-02, PNorm = 81.9977, GNorm = 0.5725, lr_0 = 2.4724e-04
Loss = 9.1134e-02, PNorm = 82.0014, GNorm = 0.5088, lr_0 = 2.4707e-04
Validation mae = 0.228194
Epoch 19
Loss = 8.6892e-02, PNorm = 82.0062, GNorm = 0.8186, lr_0 = 2.4690e-04
Loss = 9.3813e-02, PNorm = 82.0100, GNorm = 1.0400, lr_0 = 2.4674e-04
Loss = 9.3107e-02, PNorm = 82.0130, GNorm = 0.5601, lr_0 = 2.4657e-04
Loss = 9.9811e-02, PNorm = 82.0221, GNorm = 0.6327, lr_0 = 2.4640e-04
Loss = 8.7499e-02, PNorm = 82.0299, GNorm = 0.7379, lr_0 = 2.4623e-04
Loss = 9.6308e-02, PNorm = 82.0344, GNorm = 0.5434, lr_0 = 2.4606e-04
Loss = 8.8428e-02, PNorm = 82.0356, GNorm = 0.5909, lr_0 = 2.4589e-04
Loss = 8.9327e-02, PNorm = 82.0417, GNorm = 0.5956, lr_0 = 2.4572e-04
Loss = 8.8515e-02, PNorm = 82.0464, GNorm = 0.6218, lr_0 = 2.4556e-04
Loss = 8.7559e-02, PNorm = 82.0468, GNorm = 0.6638, lr_0 = 2.4539e-04
Loss = 8.1940e-02, PNorm = 82.0503, GNorm = 0.5076, lr_0 = 2.4522e-04
Loss = 8.5137e-02, PNorm = 82.0543, GNorm = 0.5845, lr_0 = 2.4505e-04
Loss = 1.0597e-01, PNorm = 82.0607, GNorm = 0.6215, lr_0 = 2.4488e-04
Loss = 9.2403e-02, PNorm = 82.0665, GNorm = 0.6991, lr_0 = 2.4472e-04
Loss = 8.8480e-02, PNorm = 82.0692, GNorm = 0.4997, lr_0 = 2.4455e-04
Loss = 9.4768e-02, PNorm = 82.0723, GNorm = 0.5044, lr_0 = 2.4438e-04
Loss = 9.6076e-02, PNorm = 82.0811, GNorm = 0.6508, lr_0 = 2.4421e-04
Loss = 1.0098e-01, PNorm = 82.0888, GNorm = 0.6324, lr_0 = 2.4405e-04
Loss = 9.8291e-02, PNorm = 82.0914, GNorm = 0.7694, lr_0 = 2.4388e-04
Loss = 1.0751e-01, PNorm = 82.0978, GNorm = 0.9000, lr_0 = 2.4371e-04
Loss = 9.6287e-02, PNorm = 82.1008, GNorm = 0.6365, lr_0 = 2.4354e-04
Loss = 1.0388e-01, PNorm = 82.1042, GNorm = 0.6190, lr_0 = 2.4338e-04
Loss = 1.0070e-01, PNorm = 82.1101, GNorm = 0.9781, lr_0 = 2.4321e-04
Loss = 9.6766e-02, PNorm = 82.1120, GNorm = 0.5471, lr_0 = 2.4304e-04
Loss = 8.7901e-02, PNorm = 82.1167, GNorm = 0.5901, lr_0 = 2.4288e-04
Loss = 9.4354e-02, PNorm = 82.1238, GNorm = 0.8417, lr_0 = 2.4271e-04
Loss = 1.0006e-01, PNorm = 82.1276, GNorm = 0.7671, lr_0 = 2.4254e-04
Loss = 8.3196e-02, PNorm = 82.1312, GNorm = 0.8132, lr_0 = 2.4238e-04
Loss = 1.0475e-01, PNorm = 82.1354, GNorm = 0.6784, lr_0 = 2.4221e-04
Loss = 9.9785e-02, PNorm = 82.1401, GNorm = 0.7215, lr_0 = 2.4205e-04
Loss = 8.9342e-02, PNorm = 82.1412, GNorm = 0.8410, lr_0 = 2.4188e-04
Loss = 9.9411e-02, PNorm = 82.1441, GNorm = 0.8991, lr_0 = 2.4171e-04
Loss = 9.2729e-02, PNorm = 82.1479, GNorm = 0.7092, lr_0 = 2.4155e-04
Loss = 8.6474e-02, PNorm = 82.1550, GNorm = 0.8536, lr_0 = 2.4138e-04
Loss = 1.0505e-01, PNorm = 82.1613, GNorm = 0.8923, lr_0 = 2.4122e-04
Loss = 1.0074e-01, PNorm = 82.1667, GNorm = 0.6392, lr_0 = 2.4105e-04
Loss = 9.9173e-02, PNorm = 82.1732, GNorm = 0.7871, lr_0 = 2.4089e-04
Loss = 9.8984e-02, PNorm = 82.1806, GNorm = 0.6939, lr_0 = 2.4072e-04
Loss = 9.7322e-02, PNorm = 82.1850, GNorm = 1.0023, lr_0 = 2.4056e-04
Loss = 8.6208e-02, PNorm = 82.1912, GNorm = 0.5047, lr_0 = 2.4039e-04
Loss = 1.0602e-01, PNorm = 82.1944, GNorm = 0.6931, lr_0 = 2.4023e-04
Loss = 1.0528e-01, PNorm = 82.1997, GNorm = 0.7176, lr_0 = 2.4006e-04
Loss = 9.4889e-02, PNorm = 82.2030, GNorm = 0.5770, lr_0 = 2.3990e-04
Loss = 1.0471e-01, PNorm = 82.2077, GNorm = 0.7072, lr_0 = 2.3974e-04
Loss = 9.9791e-02, PNorm = 82.2124, GNorm = 0.6292, lr_0 = 2.3957e-04
Loss = 1.0222e-01, PNorm = 82.2160, GNorm = 0.8064, lr_0 = 2.3941e-04
Loss = 1.0494e-01, PNorm = 82.2213, GNorm = 1.0228, lr_0 = 2.3924e-04
Loss = 8.4216e-02, PNorm = 82.2275, GNorm = 0.6584, lr_0 = 2.3908e-04
Loss = 9.8584e-02, PNorm = 82.2327, GNorm = 0.5919, lr_0 = 2.3892e-04
Loss = 9.2810e-02, PNorm = 82.2342, GNorm = 0.5394, lr_0 = 2.3875e-04
Loss = 9.6572e-02, PNorm = 82.2380, GNorm = 0.5392, lr_0 = 2.3859e-04
Loss = 9.0102e-02, PNorm = 82.2424, GNorm = 0.5280, lr_0 = 2.3842e-04
Loss = 1.0133e-01, PNorm = 82.2471, GNorm = 0.5980, lr_0 = 2.3826e-04
Loss = 1.0354e-01, PNorm = 82.2518, GNorm = 0.7826, lr_0 = 2.3810e-04
Loss = 8.7648e-02, PNorm = 82.2552, GNorm = 0.6162, lr_0 = 2.3794e-04
Loss = 9.0688e-02, PNorm = 82.2578, GNorm = 0.8335, lr_0 = 2.3777e-04
Loss = 8.9988e-02, PNorm = 82.2634, GNorm = 0.5776, lr_0 = 2.3761e-04
Loss = 9.7833e-02, PNorm = 82.2696, GNorm = 0.5381, lr_0 = 2.3745e-04
Loss = 9.4012e-02, PNorm = 82.2761, GNorm = 0.8379, lr_0 = 2.3728e-04
Loss = 8.3995e-02, PNorm = 82.2821, GNorm = 0.6817, lr_0 = 2.3712e-04
Loss = 9.8858e-02, PNorm = 82.2851, GNorm = 0.5757, lr_0 = 2.3696e-04
Loss = 9.2598e-02, PNorm = 82.2900, GNorm = 0.5473, lr_0 = 2.3680e-04
Loss = 9.3682e-02, PNorm = 82.2953, GNorm = 0.5370, lr_0 = 2.3663e-04
Loss = 1.0620e-01, PNorm = 82.2980, GNorm = 0.7509, lr_0 = 2.3647e-04
Loss = 9.7580e-02, PNorm = 82.3000, GNorm = 0.7301, lr_0 = 2.3631e-04
Loss = 9.1187e-02, PNorm = 82.3002, GNorm = 0.7527, lr_0 = 2.3615e-04
Loss = 1.0101e-01, PNorm = 82.3059, GNorm = 0.5943, lr_0 = 2.3599e-04
Loss = 9.7837e-02, PNorm = 82.3125, GNorm = 0.5713, lr_0 = 2.3582e-04
Loss = 9.4570e-02, PNorm = 82.3168, GNorm = 0.7233, lr_0 = 2.3566e-04
Loss = 8.9127e-02, PNorm = 82.3216, GNorm = 0.8225, lr_0 = 2.3550e-04
Loss = 1.0670e-01, PNorm = 82.3233, GNorm = 0.8964, lr_0 = 2.3534e-04
Loss = 7.9441e-02, PNorm = 82.3278, GNorm = 0.5093, lr_0 = 2.3518e-04
Loss = 9.7587e-02, PNorm = 82.3316, GNorm = 0.7886, lr_0 = 2.3502e-04
Loss = 1.0143e-01, PNorm = 82.3378, GNorm = 0.7234, lr_0 = 2.3486e-04
Loss = 1.0174e-01, PNorm = 82.3430, GNorm = 0.7610, lr_0 = 2.3470e-04
Loss = 9.2758e-02, PNorm = 82.3473, GNorm = 0.5474, lr_0 = 2.3454e-04
Loss = 1.0223e-01, PNorm = 82.3505, GNorm = 0.6867, lr_0 = 2.3437e-04
Loss = 9.0697e-02, PNorm = 82.3542, GNorm = 0.4913, lr_0 = 2.3421e-04
Loss = 1.0036e-01, PNorm = 82.3578, GNorm = 0.5298, lr_0 = 2.3405e-04
Loss = 1.0283e-01, PNorm = 82.3609, GNorm = 0.5281, lr_0 = 2.3389e-04
Loss = 9.0834e-02, PNorm = 82.3661, GNorm = 0.6981, lr_0 = 2.3373e-04
Loss = 9.1976e-02, PNorm = 82.3697, GNorm = 0.8224, lr_0 = 2.3357e-04
Loss = 9.0887e-02, PNorm = 82.3730, GNorm = 0.6124, lr_0 = 2.3341e-04
Loss = 9.8788e-02, PNorm = 82.3771, GNorm = 0.6223, lr_0 = 2.3325e-04
Loss = 9.8978e-02, PNorm = 82.3829, GNorm = 0.5790, lr_0 = 2.3309e-04
Loss = 9.4995e-02, PNorm = 82.3897, GNorm = 0.8274, lr_0 = 2.3293e-04
Loss = 8.2489e-02, PNorm = 82.3950, GNorm = 0.7057, lr_0 = 2.3277e-04
Loss = 8.3761e-02, PNorm = 82.3993, GNorm = 0.9460, lr_0 = 2.3261e-04
Loss = 1.1177e-01, PNorm = 82.4025, GNorm = 0.6905, lr_0 = 2.3246e-04
Loss = 8.4046e-02, PNorm = 82.4080, GNorm = 0.5665, lr_0 = 2.3230e-04
Loss = 1.0131e-01, PNorm = 82.4152, GNorm = 0.9296, lr_0 = 2.3214e-04
Loss = 9.3835e-02, PNorm = 82.4192, GNorm = 0.5828, lr_0 = 2.3198e-04
Loss = 1.0309e-01, PNorm = 82.4230, GNorm = 0.6829, lr_0 = 2.3182e-04
Loss = 8.1983e-02, PNorm = 82.4285, GNorm = 0.6494, lr_0 = 2.3166e-04
Loss = 8.9253e-02, PNorm = 82.4321, GNorm = 1.0151, lr_0 = 2.3150e-04
Loss = 9.2351e-02, PNorm = 82.4339, GNorm = 0.8339, lr_0 = 2.3134e-04
Loss = 9.2311e-02, PNorm = 82.4372, GNorm = 1.0499, lr_0 = 2.3118e-04
Loss = 8.9914e-02, PNorm = 82.4444, GNorm = 0.9739, lr_0 = 2.3103e-04
Loss = 9.3229e-02, PNorm = 82.4490, GNorm = 0.6852, lr_0 = 2.3087e-04
Loss = 1.0236e-01, PNorm = 82.4544, GNorm = 0.8525, lr_0 = 2.3071e-04
Loss = 8.6340e-02, PNorm = 82.4577, GNorm = 0.6495, lr_0 = 2.3055e-04
Loss = 9.1860e-02, PNorm = 82.4642, GNorm = 0.6332, lr_0 = 2.3039e-04
Loss = 8.1704e-02, PNorm = 82.4686, GNorm = 0.5573, lr_0 = 2.3024e-04
Loss = 9.7155e-02, PNorm = 82.4734, GNorm = 0.5587, lr_0 = 2.3008e-04
Loss = 1.0025e-01, PNorm = 82.4793, GNorm = 0.5795, lr_0 = 2.2992e-04
Loss = 9.8574e-02, PNorm = 82.4810, GNorm = 0.7920, lr_0 = 2.2976e-04
Loss = 9.7453e-02, PNorm = 82.4822, GNorm = 0.7339, lr_0 = 2.2961e-04
Loss = 1.0810e-01, PNorm = 82.4870, GNorm = 0.7648, lr_0 = 2.2945e-04
Loss = 1.0058e-01, PNorm = 82.4910, GNorm = 0.8111, lr_0 = 2.2929e-04
Loss = 8.6487e-02, PNorm = 82.4922, GNorm = 0.7288, lr_0 = 2.2913e-04
Loss = 9.7869e-02, PNorm = 82.4953, GNorm = 0.8287, lr_0 = 2.2898e-04
Loss = 1.0084e-01, PNorm = 82.4960, GNorm = 0.5439, lr_0 = 2.2882e-04
Loss = 8.9984e-02, PNorm = 82.4975, GNorm = 0.9812, lr_0 = 2.2866e-04
Loss = 8.4403e-02, PNorm = 82.5000, GNorm = 0.7335, lr_0 = 2.2851e-04
Loss = 9.4455e-02, PNorm = 82.5039, GNorm = 0.6760, lr_0 = 2.2835e-04
Loss = 1.0000e-01, PNorm = 82.5043, GNorm = 0.8241, lr_0 = 2.2819e-04
Loss = 9.5574e-02, PNorm = 82.5101, GNorm = 0.6023, lr_0 = 2.2804e-04
Loss = 8.3578e-02, PNorm = 82.5184, GNorm = 0.5636, lr_0 = 2.2788e-04
Loss = 8.7064e-02, PNorm = 82.5229, GNorm = 0.4854, lr_0 = 2.2773e-04
Loss = 9.6758e-02, PNorm = 82.5250, GNorm = 0.5291, lr_0 = 2.2757e-04
Validation mae = 0.228769
Epoch 20
Loss = 9.4520e-02, PNorm = 82.5302, GNorm = 1.0474, lr_0 = 2.2741e-04
Loss = 9.6393e-02, PNorm = 82.5327, GNorm = 0.5221, lr_0 = 2.2726e-04
Loss = 8.7483e-02, PNorm = 82.5361, GNorm = 0.8182, lr_0 = 2.2710e-04
Loss = 9.3224e-02, PNorm = 82.5428, GNorm = 0.5791, lr_0 = 2.2695e-04
Loss = 7.8372e-02, PNorm = 82.5444, GNorm = 0.5705, lr_0 = 2.2679e-04
Loss = 9.3466e-02, PNorm = 82.5485, GNorm = 0.5743, lr_0 = 2.2664e-04
Loss = 8.2620e-02, PNorm = 82.5531, GNorm = 0.4212, lr_0 = 2.2648e-04
Loss = 8.7269e-02, PNorm = 82.5579, GNorm = 0.4761, lr_0 = 2.2632e-04
Loss = 1.0578e-01, PNorm = 82.5618, GNorm = 0.7643, lr_0 = 2.2617e-04
Loss = 8.7284e-02, PNorm = 82.5647, GNorm = 0.7644, lr_0 = 2.2601e-04
Loss = 8.5105e-02, PNorm = 82.5702, GNorm = 0.6525, lr_0 = 2.2586e-04
Loss = 8.4846e-02, PNorm = 82.5755, GNorm = 0.5384, lr_0 = 2.2571e-04
Loss = 9.1266e-02, PNorm = 82.5789, GNorm = 0.8918, lr_0 = 2.2555e-04
Loss = 9.3639e-02, PNorm = 82.5821, GNorm = 0.6748, lr_0 = 2.2540e-04
Loss = 9.2292e-02, PNorm = 82.5877, GNorm = 0.7809, lr_0 = 2.2524e-04
Loss = 1.0447e-01, PNorm = 82.5895, GNorm = 0.8011, lr_0 = 2.2509e-04
Loss = 1.0662e-01, PNorm = 82.5915, GNorm = 0.8757, lr_0 = 2.2493e-04
Loss = 7.9931e-02, PNorm = 82.5941, GNorm = 0.5063, lr_0 = 2.2478e-04
Loss = 1.0247e-01, PNorm = 82.5958, GNorm = 0.9445, lr_0 = 2.2463e-04
Loss = 9.0141e-02, PNorm = 82.6005, GNorm = 0.5715, lr_0 = 2.2447e-04
Loss = 8.8347e-02, PNorm = 82.6047, GNorm = 0.5920, lr_0 = 2.2432e-04
Loss = 9.6762e-02, PNorm = 82.6076, GNorm = 0.6198, lr_0 = 2.2416e-04
Loss = 8.0466e-02, PNorm = 82.6127, GNorm = 0.4659, lr_0 = 2.2401e-04
Loss = 9.3022e-02, PNorm = 82.6188, GNorm = 0.9961, lr_0 = 2.2386e-04
Loss = 8.6605e-02, PNorm = 82.6247, GNorm = 0.6679, lr_0 = 2.2370e-04
Loss = 9.6785e-02, PNorm = 82.6290, GNorm = 0.6222, lr_0 = 2.2355e-04
Loss = 9.2612e-02, PNorm = 82.6332, GNorm = 0.6443, lr_0 = 2.2340e-04
Loss = 9.2281e-02, PNorm = 82.6352, GNorm = 0.6471, lr_0 = 2.2324e-04
Loss = 9.7392e-02, PNorm = 82.6366, GNorm = 0.7885, lr_0 = 2.2309e-04
Loss = 1.0383e-01, PNorm = 82.6418, GNorm = 0.5301, lr_0 = 2.2294e-04
Loss = 9.8808e-02, PNorm = 82.6468, GNorm = 0.6756, lr_0 = 2.2279e-04
Loss = 1.0365e-01, PNorm = 82.6504, GNorm = 0.6779, lr_0 = 2.2263e-04
Loss = 9.2295e-02, PNorm = 82.6568, GNorm = 0.6387, lr_0 = 2.2248e-04
Loss = 8.7639e-02, PNorm = 82.6617, GNorm = 0.6502, lr_0 = 2.2233e-04
Loss = 9.8315e-02, PNorm = 82.6676, GNorm = 1.0310, lr_0 = 2.2218e-04
Loss = 9.3267e-02, PNorm = 82.6754, GNorm = 0.7340, lr_0 = 2.2202e-04
Loss = 9.9892e-02, PNorm = 82.6779, GNorm = 0.5961, lr_0 = 2.2187e-04
Loss = 9.3538e-02, PNorm = 82.6831, GNorm = 0.6263, lr_0 = 2.2172e-04
Loss = 9.4733e-02, PNorm = 82.6876, GNorm = 0.9353, lr_0 = 2.2157e-04
Loss = 9.9116e-02, PNorm = 82.6871, GNorm = 0.6003, lr_0 = 2.2142e-04
Loss = 1.0112e-01, PNorm = 82.6926, GNorm = 0.7097, lr_0 = 2.2126e-04
Loss = 8.1802e-02, PNorm = 82.6968, GNorm = 0.5238, lr_0 = 2.2111e-04
Loss = 8.9607e-02, PNorm = 82.6995, GNorm = 0.5243, lr_0 = 2.2096e-04
Loss = 9.8174e-02, PNorm = 82.7041, GNorm = 0.5237, lr_0 = 2.2081e-04
Loss = 9.2975e-02, PNorm = 82.7094, GNorm = 0.9095, lr_0 = 2.2066e-04
Loss = 8.6373e-02, PNorm = 82.7118, GNorm = 0.8365, lr_0 = 2.2051e-04
Loss = 9.2690e-02, PNorm = 82.7149, GNorm = 0.6513, lr_0 = 2.2036e-04
Loss = 9.5160e-02, PNorm = 82.7177, GNorm = 0.5896, lr_0 = 2.2021e-04
Loss = 9.0970e-02, PNorm = 82.7234, GNorm = 0.9226, lr_0 = 2.2005e-04
Loss = 8.1074e-02, PNorm = 82.7286, GNorm = 0.7780, lr_0 = 2.1990e-04
Loss = 1.0627e-01, PNorm = 82.7346, GNorm = 0.6746, lr_0 = 2.1975e-04
Loss = 9.4640e-02, PNorm = 82.7398, GNorm = 0.7894, lr_0 = 2.1960e-04
Loss = 8.9564e-02, PNorm = 82.7441, GNorm = 0.8610, lr_0 = 2.1945e-04
Loss = 9.4869e-02, PNorm = 82.7475, GNorm = 1.1247, lr_0 = 2.1930e-04
Loss = 8.8609e-02, PNorm = 82.7482, GNorm = 0.6167, lr_0 = 2.1915e-04
Loss = 9.5485e-02, PNorm = 82.7528, GNorm = 0.6964, lr_0 = 2.1900e-04
Loss = 9.0544e-02, PNorm = 82.7571, GNorm = 0.5616, lr_0 = 2.1885e-04
Loss = 1.0179e-01, PNorm = 82.7621, GNorm = 1.1843, lr_0 = 2.1870e-04
Loss = 9.2047e-02, PNorm = 82.7675, GNorm = 0.5324, lr_0 = 2.1855e-04
Loss = 8.5487e-02, PNorm = 82.7715, GNorm = 0.6428, lr_0 = 2.1840e-04
Loss = 8.9381e-02, PNorm = 82.7768, GNorm = 0.6752, lr_0 = 2.1825e-04
Loss = 8.3516e-02, PNorm = 82.7831, GNorm = 0.4766, lr_0 = 2.1810e-04
Loss = 9.5793e-02, PNorm = 82.7880, GNorm = 0.7049, lr_0 = 2.1795e-04
Loss = 9.4078e-02, PNorm = 82.7931, GNorm = 0.7660, lr_0 = 2.1780e-04
Loss = 8.8228e-02, PNorm = 82.7947, GNorm = 0.8388, lr_0 = 2.1765e-04
Loss = 8.6878e-02, PNorm = 82.7973, GNorm = 0.7164, lr_0 = 2.1751e-04
Loss = 8.8125e-02, PNorm = 82.8020, GNorm = 0.7180, lr_0 = 2.1736e-04
Loss = 8.5094e-02, PNorm = 82.8060, GNorm = 0.6142, lr_0 = 2.1721e-04
Loss = 8.6808e-02, PNorm = 82.8113, GNorm = 0.6526, lr_0 = 2.1706e-04
Loss = 8.0437e-02, PNorm = 82.8136, GNorm = 0.7481, lr_0 = 2.1691e-04
Loss = 8.6919e-02, PNorm = 82.8153, GNorm = 0.8326, lr_0 = 2.1676e-04
Loss = 8.2092e-02, PNorm = 82.8184, GNorm = 0.6007, lr_0 = 2.1661e-04
Loss = 8.4431e-02, PNorm = 82.8227, GNorm = 0.5424, lr_0 = 2.1646e-04
Loss = 9.5732e-02, PNorm = 82.8250, GNorm = 0.6717, lr_0 = 2.1632e-04
Loss = 1.0439e-01, PNorm = 82.8256, GNorm = 0.5926, lr_0 = 2.1617e-04
Loss = 9.2565e-02, PNorm = 82.8282, GNorm = 0.7557, lr_0 = 2.1602e-04
Loss = 8.6576e-02, PNorm = 82.8307, GNorm = 0.6054, lr_0 = 2.1587e-04
Loss = 8.6713e-02, PNorm = 82.8346, GNorm = 0.6506, lr_0 = 2.1572e-04
Loss = 1.0313e-01, PNorm = 82.8395, GNorm = 0.5447, lr_0 = 2.1558e-04
Loss = 8.6920e-02, PNorm = 82.8427, GNorm = 0.5794, lr_0 = 2.1543e-04
Loss = 1.0562e-01, PNorm = 82.8492, GNorm = 0.5789, lr_0 = 2.1528e-04
Loss = 9.3973e-02, PNorm = 82.8539, GNorm = 0.5803, lr_0 = 2.1513e-04
Loss = 9.0563e-02, PNorm = 82.8568, GNorm = 0.5724, lr_0 = 2.1499e-04
Loss = 9.3785e-02, PNorm = 82.8614, GNorm = 0.7431, lr_0 = 2.1484e-04
Loss = 8.8511e-02, PNorm = 82.8647, GNorm = 0.6583, lr_0 = 2.1469e-04
Loss = 8.9415e-02, PNorm = 82.8675, GNorm = 0.7086, lr_0 = 2.1454e-04
Loss = 9.8104e-02, PNorm = 82.8715, GNorm = 0.5767, lr_0 = 2.1440e-04
Loss = 9.2092e-02, PNorm = 82.8755, GNorm = 0.6810, lr_0 = 2.1425e-04
Loss = 8.7225e-02, PNorm = 82.8785, GNorm = 0.7105, lr_0 = 2.1410e-04
Loss = 9.0289e-02, PNorm = 82.8803, GNorm = 0.7675, lr_0 = 2.1396e-04
Loss = 8.2743e-02, PNorm = 82.8837, GNorm = 0.6519, lr_0 = 2.1381e-04
Loss = 9.3987e-02, PNorm = 82.8856, GNorm = 0.8260, lr_0 = 2.1366e-04
Loss = 8.4665e-02, PNorm = 82.8884, GNorm = 0.7322, lr_0 = 2.1352e-04
Loss = 8.7660e-02, PNorm = 82.8915, GNorm = 0.9060, lr_0 = 2.1337e-04
Loss = 1.0262e-01, PNorm = 82.8968, GNorm = 0.8007, lr_0 = 2.1323e-04
Loss = 9.7894e-02, PNorm = 82.9021, GNorm = 0.5533, lr_0 = 2.1308e-04
Loss = 9.7498e-02, PNorm = 82.9076, GNorm = 0.5155, lr_0 = 2.1293e-04
Loss = 9.6095e-02, PNorm = 82.9113, GNorm = 0.7204, lr_0 = 2.1279e-04
Loss = 8.3935e-02, PNorm = 82.9154, GNorm = 0.6041, lr_0 = 2.1264e-04
Loss = 9.7538e-02, PNorm = 82.9190, GNorm = 0.6322, lr_0 = 2.1250e-04
Loss = 9.4095e-02, PNorm = 82.9245, GNorm = 0.9953, lr_0 = 2.1235e-04
Loss = 8.9803e-02, PNorm = 82.9302, GNorm = 0.5594, lr_0 = 2.1221e-04
Loss = 8.8231e-02, PNorm = 82.9369, GNorm = 0.5919, lr_0 = 2.1206e-04
Loss = 1.0128e-01, PNorm = 82.9394, GNorm = 0.5519, lr_0 = 2.1191e-04
Loss = 9.1852e-02, PNorm = 82.9440, GNorm = 0.4655, lr_0 = 2.1177e-04
Loss = 9.0186e-02, PNorm = 82.9486, GNorm = 0.6105, lr_0 = 2.1162e-04
Loss = 9.6244e-02, PNorm = 82.9505, GNorm = 0.6285, lr_0 = 2.1148e-04
Loss = 8.9467e-02, PNorm = 82.9543, GNorm = 0.5824, lr_0 = 2.1133e-04
Loss = 8.9799e-02, PNorm = 82.9609, GNorm = 0.6824, lr_0 = 2.1119e-04
Loss = 8.3902e-02, PNorm = 82.9630, GNorm = 0.6362, lr_0 = 2.1104e-04
Loss = 7.8381e-02, PNorm = 82.9651, GNorm = 0.5818, lr_0 = 2.1090e-04
Loss = 9.1067e-02, PNorm = 82.9699, GNorm = 0.6579, lr_0 = 2.1076e-04
Loss = 8.2961e-02, PNorm = 82.9745, GNorm = 0.5545, lr_0 = 2.1061e-04
Loss = 8.8561e-02, PNorm = 82.9795, GNorm = 0.7822, lr_0 = 2.1047e-04
Loss = 1.0184e-01, PNorm = 82.9834, GNorm = 0.7396, lr_0 = 2.1032e-04
Loss = 9.8988e-02, PNorm = 82.9850, GNorm = 0.6620, lr_0 = 2.1018e-04
Loss = 9.2247e-02, PNorm = 82.9898, GNorm = 0.6409, lr_0 = 2.1003e-04
Loss = 9.0478e-02, PNorm = 82.9913, GNorm = 0.6383, lr_0 = 2.0989e-04
Loss = 9.7985e-02, PNorm = 82.9940, GNorm = 0.5781, lr_0 = 2.0975e-04
Loss = 1.0617e-01, PNorm = 83.0001, GNorm = 1.0498, lr_0 = 2.0960e-04
Validation mae = 0.232833
Epoch 21
Loss = 8.4326e-02, PNorm = 83.0048, GNorm = 0.6778, lr_0 = 2.0946e-04
Loss = 8.4342e-02, PNorm = 83.0085, GNorm = 0.6399, lr_0 = 2.0932e-04
Loss = 8.0155e-02, PNorm = 83.0124, GNorm = 0.5764, lr_0 = 2.0917e-04
Loss = 8.5956e-02, PNorm = 83.0147, GNorm = 0.5587, lr_0 = 2.0903e-04
Loss = 8.1369e-02, PNorm = 83.0181, GNorm = 0.7322, lr_0 = 2.0889e-04
Loss = 9.2714e-02, PNorm = 83.0217, GNorm = 0.6620, lr_0 = 2.0874e-04
Loss = 8.2622e-02, PNorm = 83.0247, GNorm = 0.6839, lr_0 = 2.0860e-04
Loss = 8.7401e-02, PNorm = 83.0306, GNorm = 0.5765, lr_0 = 2.0846e-04
Loss = 8.7769e-02, PNorm = 83.0365, GNorm = 0.6088, lr_0 = 2.0831e-04
Loss = 8.7048e-02, PNorm = 83.0419, GNorm = 0.7790, lr_0 = 2.0817e-04
Loss = 8.1446e-02, PNorm = 83.0448, GNorm = 0.4533, lr_0 = 2.0803e-04
Loss = 9.4683e-02, PNorm = 83.0497, GNorm = 0.6863, lr_0 = 2.0789e-04
Loss = 8.9789e-02, PNorm = 83.0554, GNorm = 0.6746, lr_0 = 2.0774e-04
Loss = 7.8657e-02, PNorm = 83.0585, GNorm = 0.5460, lr_0 = 2.0760e-04
Loss = 9.4728e-02, PNorm = 83.0617, GNorm = 0.5593, lr_0 = 2.0746e-04
Loss = 8.4614e-02, PNorm = 83.0652, GNorm = 0.6098, lr_0 = 2.0732e-04
Loss = 9.6009e-02, PNorm = 83.0666, GNorm = 0.7579, lr_0 = 2.0718e-04
Loss = 9.0543e-02, PNorm = 83.0694, GNorm = 0.6641, lr_0 = 2.0703e-04
Loss = 1.0155e-01, PNorm = 83.0733, GNorm = 0.7255, lr_0 = 2.0689e-04
Loss = 9.6747e-02, PNorm = 83.0754, GNorm = 0.6285, lr_0 = 2.0675e-04
Loss = 7.7129e-02, PNorm = 83.0780, GNorm = 0.6618, lr_0 = 2.0661e-04
Loss = 1.0772e-01, PNorm = 83.0855, GNorm = 0.5482, lr_0 = 2.0647e-04
Loss = 9.5005e-02, PNorm = 83.0887, GNorm = 0.5720, lr_0 = 2.0633e-04
Loss = 8.8088e-02, PNorm = 83.0917, GNorm = 0.6194, lr_0 = 2.0618e-04
Loss = 8.7553e-02, PNorm = 83.0945, GNorm = 0.8757, lr_0 = 2.0604e-04
Loss = 9.8916e-02, PNorm = 83.0984, GNorm = 0.6585, lr_0 = 2.0590e-04
Loss = 1.0150e-01, PNorm = 83.1035, GNorm = 0.7516, lr_0 = 2.0576e-04
Loss = 8.1490e-02, PNorm = 83.1082, GNorm = 0.5739, lr_0 = 2.0562e-04
Loss = 1.0263e-01, PNorm = 83.1106, GNorm = 0.5945, lr_0 = 2.0548e-04
Loss = 8.6818e-02, PNorm = 83.1122, GNorm = 0.6743, lr_0 = 2.0534e-04
Loss = 7.8183e-02, PNorm = 83.1140, GNorm = 0.5704, lr_0 = 2.0520e-04
Loss = 8.7826e-02, PNorm = 83.1167, GNorm = 0.5534, lr_0 = 2.0506e-04
Loss = 8.6567e-02, PNorm = 83.1188, GNorm = 0.5233, lr_0 = 2.0492e-04
Loss = 1.0067e-01, PNorm = 83.1255, GNorm = 0.5681, lr_0 = 2.0478e-04
Loss = 9.7126e-02, PNorm = 83.1329, GNorm = 0.6363, lr_0 = 2.0464e-04
Loss = 8.7327e-02, PNorm = 83.1371, GNorm = 0.6955, lr_0 = 2.0450e-04
Loss = 9.6968e-02, PNorm = 83.1406, GNorm = 0.5841, lr_0 = 2.0436e-04
Loss = 8.8230e-02, PNorm = 83.1448, GNorm = 0.6095, lr_0 = 2.0422e-04
Loss = 8.7844e-02, PNorm = 83.1474, GNorm = 0.6037, lr_0 = 2.0408e-04
Loss = 9.7379e-02, PNorm = 83.1518, GNorm = 0.8276, lr_0 = 2.0394e-04
Loss = 9.0430e-02, PNorm = 83.1595, GNorm = 0.6558, lr_0 = 2.0380e-04
Loss = 9.2003e-02, PNorm = 83.1636, GNorm = 0.5273, lr_0 = 2.0366e-04
Loss = 8.5132e-02, PNorm = 83.1679, GNorm = 0.7355, lr_0 = 2.0352e-04
Loss = 8.6483e-02, PNorm = 83.1704, GNorm = 0.5948, lr_0 = 2.0338e-04
Loss = 8.3315e-02, PNorm = 83.1732, GNorm = 0.5913, lr_0 = 2.0324e-04
Loss = 1.0133e-01, PNorm = 83.1758, GNorm = 0.6122, lr_0 = 2.0310e-04
Loss = 8.4342e-02, PNorm = 83.1778, GNorm = 0.5963, lr_0 = 2.0296e-04
Loss = 8.2037e-02, PNorm = 83.1796, GNorm = 0.5802, lr_0 = 2.0282e-04
Loss = 9.0858e-02, PNorm = 83.1833, GNorm = 0.5979, lr_0 = 2.0268e-04
Loss = 8.5360e-02, PNorm = 83.1872, GNorm = 0.7050, lr_0 = 2.0254e-04
Loss = 9.0036e-02, PNorm = 83.1898, GNorm = 0.6521, lr_0 = 2.0240e-04
Loss = 7.3834e-02, PNorm = 83.1952, GNorm = 0.5793, lr_0 = 2.0227e-04
Loss = 7.9186e-02, PNorm = 83.1998, GNorm = 0.5450, lr_0 = 2.0213e-04
Loss = 8.6045e-02, PNorm = 83.2042, GNorm = 0.3978, lr_0 = 2.0199e-04
Loss = 8.0224e-02, PNorm = 83.2090, GNorm = 0.5687, lr_0 = 2.0185e-04
Loss = 9.0201e-02, PNorm = 83.2137, GNorm = 1.0237, lr_0 = 2.0171e-04
Loss = 9.6026e-02, PNorm = 83.2161, GNorm = 0.8778, lr_0 = 2.0157e-04
Loss = 7.8590e-02, PNorm = 83.2196, GNorm = 0.5986, lr_0 = 2.0144e-04
Loss = 9.3068e-02, PNorm = 83.2235, GNorm = 0.6330, lr_0 = 2.0130e-04
Loss = 9.4455e-02, PNorm = 83.2248, GNorm = 0.6095, lr_0 = 2.0116e-04
Loss = 1.0022e-01, PNorm = 83.2288, GNorm = 0.6703, lr_0 = 2.0102e-04
Loss = 9.5926e-02, PNorm = 83.2325, GNorm = 0.8856, lr_0 = 2.0088e-04
Loss = 8.5647e-02, PNorm = 83.2349, GNorm = 0.5630, lr_0 = 2.0075e-04
Loss = 8.4904e-02, PNorm = 83.2395, GNorm = 0.6220, lr_0 = 2.0061e-04
Loss = 8.0147e-02, PNorm = 83.2432, GNorm = 0.7463, lr_0 = 2.0047e-04
Loss = 7.8967e-02, PNorm = 83.2451, GNorm = 0.8606, lr_0 = 2.0033e-04
Loss = 8.7451e-02, PNorm = 83.2480, GNorm = 0.5155, lr_0 = 2.0020e-04
Loss = 8.9642e-02, PNorm = 83.2519, GNorm = 0.5813, lr_0 = 2.0006e-04
Loss = 9.5923e-02, PNorm = 83.2565, GNorm = 0.5448, lr_0 = 1.9992e-04
Loss = 9.3384e-02, PNorm = 83.2601, GNorm = 0.6457, lr_0 = 1.9979e-04
Loss = 8.8905e-02, PNorm = 83.2626, GNorm = 0.7060, lr_0 = 1.9965e-04
Loss = 9.1154e-02, PNorm = 83.2679, GNorm = 0.7135, lr_0 = 1.9951e-04
Loss = 9.0245e-02, PNorm = 83.2717, GNorm = 0.8252, lr_0 = 1.9938e-04
Loss = 9.1865e-02, PNorm = 83.2738, GNorm = 0.6430, lr_0 = 1.9924e-04
Loss = 8.8518e-02, PNorm = 83.2783, GNorm = 0.5152, lr_0 = 1.9910e-04
Loss = 8.7157e-02, PNorm = 83.2815, GNorm = 0.6468, lr_0 = 1.9897e-04
Loss = 1.0061e-01, PNorm = 83.2838, GNorm = 0.6313, lr_0 = 1.9883e-04
Loss = 1.0478e-01, PNorm = 83.2897, GNorm = 0.5598, lr_0 = 1.9869e-04
Loss = 9.8878e-02, PNorm = 83.2944, GNorm = 0.7003, lr_0 = 1.9856e-04
Loss = 8.8149e-02, PNorm = 83.2969, GNorm = 0.7161, lr_0 = 1.9842e-04
Loss = 9.0022e-02, PNorm = 83.3008, GNorm = 1.2109, lr_0 = 1.9829e-04
Loss = 9.9529e-02, PNorm = 83.3051, GNorm = 0.7469, lr_0 = 1.9815e-04
Loss = 1.1026e-01, PNorm = 83.3096, GNorm = 0.6494, lr_0 = 1.9801e-04
Loss = 8.7104e-02, PNorm = 83.3156, GNorm = 0.6278, lr_0 = 1.9788e-04
Loss = 9.5822e-02, PNorm = 83.3192, GNorm = 0.7487, lr_0 = 1.9774e-04
Loss = 8.1787e-02, PNorm = 83.3212, GNorm = 0.5261, lr_0 = 1.9761e-04
Loss = 8.5547e-02, PNorm = 83.3239, GNorm = 0.7064, lr_0 = 1.9747e-04
Loss = 7.7491e-02, PNorm = 83.3261, GNorm = 0.6566, lr_0 = 1.9734e-04
Loss = 8.9309e-02, PNorm = 83.3288, GNorm = 0.7814, lr_0 = 1.9720e-04
Loss = 8.4633e-02, PNorm = 83.3327, GNorm = 0.9172, lr_0 = 1.9707e-04
Loss = 8.3554e-02, PNorm = 83.3325, GNorm = 0.6922, lr_0 = 1.9693e-04
Loss = 8.7345e-02, PNorm = 83.3353, GNorm = 0.6123, lr_0 = 1.9680e-04
Loss = 1.0342e-01, PNorm = 83.3410, GNorm = 0.7395, lr_0 = 1.9666e-04
Loss = 8.1315e-02, PNorm = 83.3449, GNorm = 1.0162, lr_0 = 1.9653e-04
Loss = 8.5055e-02, PNorm = 83.3478, GNorm = 0.7776, lr_0 = 1.9639e-04
Loss = 8.5888e-02, PNorm = 83.3516, GNorm = 0.6731, lr_0 = 1.9626e-04
Loss = 8.2377e-02, PNorm = 83.3530, GNorm = 0.6937, lr_0 = 1.9612e-04
Loss = 8.2973e-02, PNorm = 83.3538, GNorm = 0.6426, lr_0 = 1.9599e-04
Loss = 1.0193e-01, PNorm = 83.3592, GNorm = 0.5507, lr_0 = 1.9585e-04
Loss = 7.9950e-02, PNorm = 83.3625, GNorm = 0.8843, lr_0 = 1.9572e-04
Loss = 9.6339e-02, PNorm = 83.3651, GNorm = 0.6038, lr_0 = 1.9559e-04
Loss = 8.7965e-02, PNorm = 83.3707, GNorm = 0.6431, lr_0 = 1.9545e-04
Loss = 8.1004e-02, PNorm = 83.3754, GNorm = 0.7339, lr_0 = 1.9532e-04
Loss = 9.9721e-02, PNorm = 83.3801, GNorm = 0.6733, lr_0 = 1.9518e-04
Loss = 8.3349e-02, PNorm = 83.3844, GNorm = 0.7199, lr_0 = 1.9505e-04
Loss = 9.2222e-02, PNorm = 83.3864, GNorm = 0.5499, lr_0 = 1.9492e-04
Loss = 9.4798e-02, PNorm = 83.3874, GNorm = 0.6265, lr_0 = 1.9478e-04
Loss = 8.7515e-02, PNorm = 83.3897, GNorm = 0.9050, lr_0 = 1.9465e-04
Loss = 9.9434e-02, PNorm = 83.3934, GNorm = 0.5286, lr_0 = 1.9452e-04
Loss = 9.1091e-02, PNorm = 83.3961, GNorm = 0.8085, lr_0 = 1.9438e-04
Loss = 9.3263e-02, PNorm = 83.4009, GNorm = 0.8160, lr_0 = 1.9425e-04
Loss = 8.3376e-02, PNorm = 83.4052, GNorm = 0.4460, lr_0 = 1.9412e-04
Loss = 9.0566e-02, PNorm = 83.4084, GNorm = 0.5822, lr_0 = 1.9398e-04
Loss = 9.1913e-02, PNorm = 83.4106, GNorm = 0.5783, lr_0 = 1.9385e-04
Loss = 9.3755e-02, PNorm = 83.4128, GNorm = 0.5968, lr_0 = 1.9372e-04
Loss = 8.5147e-02, PNorm = 83.4148, GNorm = 0.7799, lr_0 = 1.9359e-04
Loss = 9.2633e-02, PNorm = 83.4148, GNorm = 0.4895, lr_0 = 1.9345e-04
Loss = 9.1735e-02, PNorm = 83.4164, GNorm = 0.6002, lr_0 = 1.9332e-04
Loss = 8.2096e-02, PNorm = 83.4182, GNorm = 1.0867, lr_0 = 1.9319e-04
Loss = 9.0891e-02, PNorm = 83.4197, GNorm = 0.5157, lr_0 = 1.9306e-04
Validation mae = 0.227028
Epoch 22
Loss = 8.8824e-02, PNorm = 83.4250, GNorm = 0.5152, lr_0 = 1.9292e-04
Loss = 8.8853e-02, PNorm = 83.4298, GNorm = 1.0140, lr_0 = 1.9279e-04
Loss = 8.7670e-02, PNorm = 83.4331, GNorm = 0.7014, lr_0 = 1.9266e-04
Loss = 9.0102e-02, PNorm = 83.4360, GNorm = 0.6641, lr_0 = 1.9253e-04
Loss = 9.4394e-02, PNorm = 83.4383, GNorm = 0.5351, lr_0 = 1.9240e-04
Loss = 9.0116e-02, PNorm = 83.4416, GNorm = 0.7369, lr_0 = 1.9226e-04
Loss = 8.1821e-02, PNorm = 83.4443, GNorm = 0.5796, lr_0 = 1.9213e-04
Loss = 9.2375e-02, PNorm = 83.4475, GNorm = 0.6480, lr_0 = 1.9200e-04
Loss = 8.3349e-02, PNorm = 83.4507, GNorm = 0.5436, lr_0 = 1.9187e-04
Loss = 8.1311e-02, PNorm = 83.4563, GNorm = 0.6343, lr_0 = 1.9174e-04
Loss = 9.1536e-02, PNorm = 83.4623, GNorm = 0.4815, lr_0 = 1.9161e-04
Loss = 9.4255e-02, PNorm = 83.4652, GNorm = 1.1996, lr_0 = 1.9148e-04
Loss = 1.0217e-01, PNorm = 83.4689, GNorm = 0.7250, lr_0 = 1.9134e-04
Loss = 8.2541e-02, PNorm = 83.4723, GNorm = 0.4972, lr_0 = 1.9121e-04
Loss = 9.0327e-02, PNorm = 83.4756, GNorm = 0.6765, lr_0 = 1.9108e-04
Loss = 8.7980e-02, PNorm = 83.4788, GNorm = 0.7061, lr_0 = 1.9095e-04
Loss = 8.4584e-02, PNorm = 83.4830, GNorm = 0.6570, lr_0 = 1.9082e-04
Loss = 7.5540e-02, PNorm = 83.4875, GNorm = 0.6923, lr_0 = 1.9069e-04
Loss = 9.1373e-02, PNorm = 83.4905, GNorm = 0.7428, lr_0 = 1.9056e-04
Loss = 8.9492e-02, PNorm = 83.4934, GNorm = 0.5765, lr_0 = 1.9043e-04
Loss = 8.3524e-02, PNorm = 83.4973, GNorm = 0.6771, lr_0 = 1.9030e-04
Loss = 7.8489e-02, PNorm = 83.4997, GNorm = 0.5535, lr_0 = 1.9017e-04
Loss = 7.8270e-02, PNorm = 83.5040, GNorm = 0.5682, lr_0 = 1.9004e-04
Loss = 9.8973e-02, PNorm = 83.5078, GNorm = 0.6504, lr_0 = 1.8991e-04
Loss = 9.7811e-02, PNorm = 83.5118, GNorm = 0.6485, lr_0 = 1.8978e-04
Loss = 9.7788e-02, PNorm = 83.5159, GNorm = 0.5690, lr_0 = 1.8965e-04
Loss = 8.9661e-02, PNorm = 83.5202, GNorm = 0.7400, lr_0 = 1.8952e-04
Loss = 8.1779e-02, PNorm = 83.5238, GNorm = 1.0670, lr_0 = 1.8939e-04
Loss = 9.1515e-02, PNorm = 83.5226, GNorm = 0.6704, lr_0 = 1.8926e-04
Loss = 8.4320e-02, PNorm = 83.5244, GNorm = 0.6696, lr_0 = 1.8913e-04
Loss = 8.9100e-02, PNorm = 83.5269, GNorm = 0.5960, lr_0 = 1.8900e-04
Loss = 8.9337e-02, PNorm = 83.5301, GNorm = 0.8771, lr_0 = 1.8887e-04
Loss = 8.5027e-02, PNorm = 83.5346, GNorm = 0.5585, lr_0 = 1.8874e-04
Loss = 9.3987e-02, PNorm = 83.5379, GNorm = 1.0239, lr_0 = 1.8861e-04
Loss = 7.8961e-02, PNorm = 83.5406, GNorm = 0.6441, lr_0 = 1.8848e-04
Loss = 9.6512e-02, PNorm = 83.5444, GNorm = 0.9091, lr_0 = 1.8835e-04
Loss = 9.4625e-02, PNorm = 83.5475, GNorm = 0.6408, lr_0 = 1.8822e-04
Loss = 8.4527e-02, PNorm = 83.5530, GNorm = 0.6832, lr_0 = 1.8809e-04
Loss = 7.9690e-02, PNorm = 83.5570, GNorm = 0.4373, lr_0 = 1.8797e-04
Loss = 8.9870e-02, PNorm = 83.5575, GNorm = 0.5691, lr_0 = 1.8784e-04
Loss = 7.9388e-02, PNorm = 83.5602, GNorm = 0.7824, lr_0 = 1.8771e-04
Loss = 7.8481e-02, PNorm = 83.5641, GNorm = 0.5304, lr_0 = 1.8758e-04
Loss = 9.3756e-02, PNorm = 83.5671, GNorm = 0.6469, lr_0 = 1.8745e-04
Loss = 8.4757e-02, PNorm = 83.5687, GNorm = 0.4780, lr_0 = 1.8732e-04
Loss = 8.6364e-02, PNorm = 83.5713, GNorm = 0.7418, lr_0 = 1.8719e-04
Loss = 9.7620e-02, PNorm = 83.5739, GNorm = 0.6904, lr_0 = 1.8707e-04
Loss = 9.3463e-02, PNorm = 83.5765, GNorm = 0.6826, lr_0 = 1.8694e-04
Loss = 7.1863e-02, PNorm = 83.5796, GNorm = 0.5742, lr_0 = 1.8681e-04
Loss = 9.8354e-02, PNorm = 83.5825, GNorm = 0.7059, lr_0 = 1.8668e-04
Loss = 1.0155e-01, PNorm = 83.5845, GNorm = 0.8169, lr_0 = 1.8655e-04
Loss = 7.6507e-02, PNorm = 83.5853, GNorm = 0.5704, lr_0 = 1.8643e-04
Loss = 7.8956e-02, PNorm = 83.5894, GNorm = 0.5576, lr_0 = 1.8630e-04
Loss = 7.7114e-02, PNorm = 83.5935, GNorm = 0.5328, lr_0 = 1.8617e-04
Loss = 7.9057e-02, PNorm = 83.5961, GNorm = 0.6074, lr_0 = 1.8604e-04
Loss = 8.1286e-02, PNorm = 83.5968, GNorm = 0.5865, lr_0 = 1.8592e-04
Loss = 9.2776e-02, PNorm = 83.6017, GNorm = 0.7828, lr_0 = 1.8579e-04
Loss = 9.0891e-02, PNorm = 83.6067, GNorm = 0.6022, lr_0 = 1.8566e-04
Loss = 8.5844e-02, PNorm = 83.6079, GNorm = 0.6781, lr_0 = 1.8553e-04
Loss = 9.3194e-02, PNorm = 83.6111, GNorm = 0.7517, lr_0 = 1.8541e-04
Loss = 8.9948e-02, PNorm = 83.6145, GNorm = 0.7069, lr_0 = 1.8528e-04
Loss = 8.7458e-02, PNorm = 83.6166, GNorm = 0.6032, lr_0 = 1.8515e-04
Loss = 7.9761e-02, PNorm = 83.6208, GNorm = 0.6620, lr_0 = 1.8503e-04
Loss = 9.7482e-02, PNorm = 83.6233, GNorm = 0.7388, lr_0 = 1.8490e-04
Loss = 8.8077e-02, PNorm = 83.6266, GNorm = 0.5481, lr_0 = 1.8477e-04
Loss = 8.2002e-02, PNorm = 83.6286, GNorm = 0.6315, lr_0 = 1.8465e-04
Loss = 8.3266e-02, PNorm = 83.6309, GNorm = 0.5658, lr_0 = 1.8452e-04
Loss = 7.5380e-02, PNorm = 83.6326, GNorm = 0.8478, lr_0 = 1.8439e-04
Loss = 7.6850e-02, PNorm = 83.6348, GNorm = 0.6462, lr_0 = 1.8427e-04
Loss = 8.0134e-02, PNorm = 83.6378, GNorm = 0.4899, lr_0 = 1.8414e-04
Loss = 8.4861e-02, PNorm = 83.6400, GNorm = 0.8882, lr_0 = 1.8401e-04
Loss = 9.0455e-02, PNorm = 83.6425, GNorm = 0.7600, lr_0 = 1.8389e-04
Loss = 9.1164e-02, PNorm = 83.6461, GNorm = 0.7071, lr_0 = 1.8376e-04
Loss = 1.0764e-01, PNorm = 83.6519, GNorm = 0.7555, lr_0 = 1.8364e-04
Loss = 8.8793e-02, PNorm = 83.6539, GNorm = 0.5195, lr_0 = 1.8351e-04
Loss = 8.2157e-02, PNorm = 83.6583, GNorm = 0.8218, lr_0 = 1.8338e-04
Loss = 9.3336e-02, PNorm = 83.6622, GNorm = 0.6749, lr_0 = 1.8326e-04
Loss = 8.2645e-02, PNorm = 83.6648, GNorm = 0.5298, lr_0 = 1.8313e-04
Loss = 9.0620e-02, PNorm = 83.6690, GNorm = 0.5283, lr_0 = 1.8301e-04
Loss = 8.3389e-02, PNorm = 83.6740, GNorm = 0.7514, lr_0 = 1.8288e-04
Loss = 7.7185e-02, PNorm = 83.6777, GNorm = 0.5702, lr_0 = 1.8276e-04
Loss = 8.3883e-02, PNorm = 83.6791, GNorm = 0.6365, lr_0 = 1.8263e-04
Loss = 8.9863e-02, PNorm = 83.6849, GNorm = 0.6807, lr_0 = 1.8251e-04
Loss = 9.2201e-02, PNorm = 83.6855, GNorm = 0.6870, lr_0 = 1.8238e-04
Loss = 9.6357e-02, PNorm = 83.6857, GNorm = 0.5843, lr_0 = 1.8226e-04
Loss = 8.3532e-02, PNorm = 83.6900, GNorm = 0.7946, lr_0 = 1.8213e-04
Loss = 9.7556e-02, PNorm = 83.6936, GNorm = 0.6303, lr_0 = 1.8201e-04
Loss = 9.5434e-02, PNorm = 83.6949, GNorm = 0.5553, lr_0 = 1.8188e-04
Loss = 9.6438e-02, PNorm = 83.6988, GNorm = 0.5268, lr_0 = 1.8176e-04
Loss = 8.3206e-02, PNorm = 83.7005, GNorm = 0.6629, lr_0 = 1.8163e-04
Loss = 8.9824e-02, PNorm = 83.7038, GNorm = 0.5592, lr_0 = 1.8151e-04
Loss = 9.8984e-02, PNorm = 83.7077, GNorm = 0.9173, lr_0 = 1.8138e-04
Loss = 8.4945e-02, PNorm = 83.7131, GNorm = 0.7009, lr_0 = 1.8126e-04
Loss = 8.1344e-02, PNorm = 83.7148, GNorm = 0.7024, lr_0 = 1.8114e-04
Loss = 8.7222e-02, PNorm = 83.7185, GNorm = 0.6021, lr_0 = 1.8101e-04
Loss = 9.8392e-02, PNorm = 83.7226, GNorm = 0.5686, lr_0 = 1.8089e-04
Loss = 9.5365e-02, PNorm = 83.7264, GNorm = 0.7587, lr_0 = 1.8076e-04
Loss = 9.0940e-02, PNorm = 83.7293, GNorm = 0.6828, lr_0 = 1.8064e-04
Loss = 9.1375e-02, PNorm = 83.7313, GNorm = 0.7970, lr_0 = 1.8052e-04
Loss = 8.3458e-02, PNorm = 83.7326, GNorm = 0.7526, lr_0 = 1.8039e-04
Loss = 8.1277e-02, PNorm = 83.7372, GNorm = 0.5066, lr_0 = 1.8027e-04
Loss = 9.2283e-02, PNorm = 83.7392, GNorm = 0.6190, lr_0 = 1.8015e-04
Loss = 8.6372e-02, PNorm = 83.7422, GNorm = 0.6547, lr_0 = 1.8002e-04
Loss = 9.2023e-02, PNorm = 83.7469, GNorm = 0.6824, lr_0 = 1.7990e-04
Loss = 7.8590e-02, PNorm = 83.7484, GNorm = 0.5385, lr_0 = 1.7978e-04
Loss = 7.8175e-02, PNorm = 83.7501, GNorm = 0.8567, lr_0 = 1.7965e-04
Loss = 8.1405e-02, PNorm = 83.7530, GNorm = 0.6111, lr_0 = 1.7953e-04
Loss = 9.1216e-02, PNorm = 83.7564, GNorm = 0.6974, lr_0 = 1.7941e-04
Loss = 8.9416e-02, PNorm = 83.7576, GNorm = 1.1943, lr_0 = 1.7928e-04
Loss = 7.8249e-02, PNorm = 83.7600, GNorm = 0.5318, lr_0 = 1.7916e-04
Loss = 8.6843e-02, PNorm = 83.7632, GNorm = 0.5502, lr_0 = 1.7904e-04
Loss = 7.7647e-02, PNorm = 83.7654, GNorm = 0.5432, lr_0 = 1.7892e-04
Loss = 9.4502e-02, PNorm = 83.7681, GNorm = 0.6032, lr_0 = 1.7879e-04
Loss = 8.6249e-02, PNorm = 83.7690, GNorm = 0.3766, lr_0 = 1.7867e-04
Loss = 9.6102e-02, PNorm = 83.7718, GNorm = 0.7043, lr_0 = 1.7855e-04
Loss = 9.2954e-02, PNorm = 83.7751, GNorm = 0.7388, lr_0 = 1.7843e-04
Loss = 8.1046e-02, PNorm = 83.7768, GNorm = 0.6756, lr_0 = 1.7830e-04
Loss = 8.7589e-02, PNorm = 83.7789, GNorm = 0.6318, lr_0 = 1.7818e-04
Loss = 8.7510e-02, PNorm = 83.7823, GNorm = 0.7387, lr_0 = 1.7806e-04
Loss = 9.3724e-02, PNorm = 83.7870, GNorm = 0.8733, lr_0 = 1.7794e-04
Loss = 9.1036e-02, PNorm = 83.7900, GNorm = 0.5305, lr_0 = 1.7782e-04
Validation mae = 0.227687
Epoch 23
Loss = 9.0908e-02, PNorm = 83.7919, GNorm = 0.5994, lr_0 = 1.7769e-04
Loss = 7.7799e-02, PNorm = 83.7960, GNorm = 0.5292, lr_0 = 1.7757e-04
Loss = 9.9519e-02, PNorm = 83.7998, GNorm = 0.6609, lr_0 = 1.7745e-04
Loss = 7.2806e-02, PNorm = 83.8008, GNorm = 0.4447, lr_0 = 1.7733e-04
Loss = 7.8522e-02, PNorm = 83.8046, GNorm = 0.5453, lr_0 = 1.7721e-04
Loss = 8.8505e-02, PNorm = 83.8095, GNorm = 0.6311, lr_0 = 1.7709e-04
Loss = 7.7992e-02, PNorm = 83.8131, GNorm = 0.6330, lr_0 = 1.7696e-04
Loss = 8.2209e-02, PNorm = 83.8153, GNorm = 0.6419, lr_0 = 1.7684e-04
Loss = 7.6958e-02, PNorm = 83.8169, GNorm = 0.6498, lr_0 = 1.7672e-04
Loss = 7.8064e-02, PNorm = 83.8182, GNorm = 0.7881, lr_0 = 1.7660e-04
Loss = 8.1158e-02, PNorm = 83.8212, GNorm = 0.5479, lr_0 = 1.7648e-04
Loss = 7.9216e-02, PNorm = 83.8249, GNorm = 0.6181, lr_0 = 1.7636e-04
Loss = 7.6685e-02, PNorm = 83.8288, GNorm = 0.6092, lr_0 = 1.7624e-04
Loss = 9.0645e-02, PNorm = 83.8315, GNorm = 0.4931, lr_0 = 1.7612e-04
Loss = 8.7311e-02, PNorm = 83.8344, GNorm = 0.5562, lr_0 = 1.7600e-04
Loss = 8.2259e-02, PNorm = 83.8363, GNorm = 0.5421, lr_0 = 1.7588e-04
Loss = 8.9464e-02, PNorm = 83.8383, GNorm = 0.7393, lr_0 = 1.7576e-04
Loss = 7.7965e-02, PNorm = 83.8422, GNorm = 0.5073, lr_0 = 1.7564e-04
Loss = 7.4864e-02, PNorm = 83.8464, GNorm = 0.7854, lr_0 = 1.7552e-04
Loss = 8.1619e-02, PNorm = 83.8503, GNorm = 0.6310, lr_0 = 1.7540e-04
Loss = 8.7770e-02, PNorm = 83.8545, GNorm = 0.5673, lr_0 = 1.7528e-04
Loss = 8.8758e-02, PNorm = 83.8562, GNorm = 0.6796, lr_0 = 1.7516e-04
Loss = 7.7186e-02, PNorm = 83.8587, GNorm = 0.6126, lr_0 = 1.7504e-04
Loss = 8.2358e-02, PNorm = 83.8639, GNorm = 0.8596, lr_0 = 1.7492e-04
Loss = 7.8988e-02, PNorm = 83.8669, GNorm = 0.5445, lr_0 = 1.7480e-04
Loss = 9.4512e-02, PNorm = 83.8691, GNorm = 1.0171, lr_0 = 1.7468e-04
Loss = 7.6866e-02, PNorm = 83.8707, GNorm = 0.5788, lr_0 = 1.7456e-04
Loss = 8.7167e-02, PNorm = 83.8759, GNorm = 0.6300, lr_0 = 1.7444e-04
Loss = 8.1637e-02, PNorm = 83.8813, GNorm = 0.5205, lr_0 = 1.7432e-04
Loss = 8.9663e-02, PNorm = 83.8853, GNorm = 0.5969, lr_0 = 1.7420e-04
Loss = 8.2742e-02, PNorm = 83.8897, GNorm = 0.6461, lr_0 = 1.7408e-04
Loss = 8.9607e-02, PNorm = 83.8941, GNorm = 0.7042, lr_0 = 1.7396e-04
Loss = 8.9254e-02, PNorm = 83.8973, GNorm = 1.1122, lr_0 = 1.7384e-04
Loss = 8.7970e-02, PNorm = 83.8971, GNorm = 0.8663, lr_0 = 1.7372e-04
Loss = 9.8826e-02, PNorm = 83.8973, GNorm = 0.5236, lr_0 = 1.7360e-04
Loss = 8.1476e-02, PNorm = 83.9023, GNorm = 0.8183, lr_0 = 1.7348e-04
Loss = 7.5768e-02, PNorm = 83.9048, GNorm = 0.9296, lr_0 = 1.7336e-04
Loss = 9.1757e-02, PNorm = 83.9093, GNorm = 0.6832, lr_0 = 1.7325e-04
Loss = 8.6867e-02, PNorm = 83.9151, GNorm = 0.8169, lr_0 = 1.7313e-04
Loss = 8.3605e-02, PNorm = 83.9185, GNorm = 0.6344, lr_0 = 1.7301e-04
Loss = 8.4386e-02, PNorm = 83.9212, GNorm = 0.5885, lr_0 = 1.7289e-04
Loss = 9.0935e-02, PNorm = 83.9229, GNorm = 0.5191, lr_0 = 1.7277e-04
Loss = 7.3289e-02, PNorm = 83.9253, GNorm = 0.6015, lr_0 = 1.7265e-04
Loss = 7.7071e-02, PNorm = 83.9262, GNorm = 0.5682, lr_0 = 1.7253e-04
Loss = 8.9885e-02, PNorm = 83.9291, GNorm = 0.6170, lr_0 = 1.7242e-04
Loss = 7.6580e-02, PNorm = 83.9297, GNorm = 0.6362, lr_0 = 1.7230e-04
Loss = 7.6652e-02, PNorm = 83.9304, GNorm = 0.9461, lr_0 = 1.7218e-04
Loss = 1.0030e-01, PNorm = 83.9326, GNorm = 0.7236, lr_0 = 1.7206e-04
Loss = 8.1137e-02, PNorm = 83.9353, GNorm = 0.5832, lr_0 = 1.7194e-04
Loss = 8.3329e-02, PNorm = 83.9390, GNorm = 0.5986, lr_0 = 1.7183e-04
Loss = 8.0695e-02, PNorm = 83.9394, GNorm = 0.5757, lr_0 = 1.7171e-04
Loss = 9.2246e-02, PNorm = 83.9424, GNorm = 0.4685, lr_0 = 1.7159e-04
Loss = 8.0275e-02, PNorm = 83.9467, GNorm = 0.5398, lr_0 = 1.7147e-04
Loss = 8.1552e-02, PNorm = 83.9505, GNorm = 0.5248, lr_0 = 1.7136e-04
Loss = 8.6695e-02, PNorm = 83.9529, GNorm = 0.5757, lr_0 = 1.7124e-04
Loss = 8.4711e-02, PNorm = 83.9558, GNorm = 0.7249, lr_0 = 1.7112e-04
Loss = 8.4011e-02, PNorm = 83.9586, GNorm = 0.4589, lr_0 = 1.7100e-04
Loss = 9.1272e-02, PNorm = 83.9623, GNorm = 0.4996, lr_0 = 1.7089e-04
Loss = 8.6489e-02, PNorm = 83.9654, GNorm = 0.6775, lr_0 = 1.7077e-04
Loss = 7.7493e-02, PNorm = 83.9667, GNorm = 0.5545, lr_0 = 1.7065e-04
Loss = 7.9523e-02, PNorm = 83.9685, GNorm = 0.4750, lr_0 = 1.7054e-04
Loss = 8.6831e-02, PNorm = 83.9704, GNorm = 0.6371, lr_0 = 1.7042e-04
Loss = 7.3433e-02, PNorm = 83.9714, GNorm = 0.6203, lr_0 = 1.7030e-04
Loss = 8.5304e-02, PNorm = 83.9733, GNorm = 0.7621, lr_0 = 1.7019e-04
Loss = 8.9025e-02, PNorm = 83.9755, GNorm = 0.6424, lr_0 = 1.7007e-04
Loss = 8.1601e-02, PNorm = 83.9773, GNorm = 0.7385, lr_0 = 1.6995e-04
Loss = 9.1458e-02, PNorm = 83.9806, GNorm = 0.5720, lr_0 = 1.6984e-04
Loss = 9.1084e-02, PNorm = 83.9847, GNorm = 0.7484, lr_0 = 1.6972e-04
Loss = 8.9396e-02, PNorm = 83.9878, GNorm = 0.7642, lr_0 = 1.6960e-04
Loss = 9.5451e-02, PNorm = 83.9911, GNorm = 0.6389, lr_0 = 1.6949e-04
Loss = 7.7027e-02, PNorm = 83.9958, GNorm = 0.7614, lr_0 = 1.6937e-04
Loss = 8.0574e-02, PNorm = 83.9992, GNorm = 0.6218, lr_0 = 1.6926e-04
Loss = 7.5021e-02, PNorm = 84.0028, GNorm = 0.7703, lr_0 = 1.6914e-04
Loss = 7.5658e-02, PNorm = 84.0039, GNorm = 0.6464, lr_0 = 1.6902e-04
Loss = 9.3544e-02, PNorm = 84.0065, GNorm = 0.8443, lr_0 = 1.6891e-04
Loss = 8.1564e-02, PNorm = 84.0093, GNorm = 0.5248, lr_0 = 1.6879e-04
Loss = 8.0566e-02, PNorm = 84.0116, GNorm = 0.7728, lr_0 = 1.6868e-04
Loss = 8.0919e-02, PNorm = 84.0145, GNorm = 0.6890, lr_0 = 1.6856e-04
Loss = 8.7329e-02, PNorm = 84.0155, GNorm = 0.4722, lr_0 = 1.6845e-04
Loss = 8.7676e-02, PNorm = 84.0185, GNorm = 0.7841, lr_0 = 1.6833e-04
Loss = 8.3720e-02, PNorm = 84.0193, GNorm = 0.5980, lr_0 = 1.6821e-04
Loss = 9.0970e-02, PNorm = 84.0195, GNorm = 0.6464, lr_0 = 1.6810e-04
Loss = 8.2410e-02, PNorm = 84.0219, GNorm = 0.5587, lr_0 = 1.6798e-04
Loss = 8.9205e-02, PNorm = 84.0250, GNorm = 0.5656, lr_0 = 1.6787e-04
Loss = 9.3590e-02, PNorm = 84.0284, GNorm = 0.6679, lr_0 = 1.6775e-04
Loss = 8.4992e-02, PNorm = 84.0321, GNorm = 0.7501, lr_0 = 1.6764e-04
Loss = 9.2810e-02, PNorm = 84.0357, GNorm = 0.7927, lr_0 = 1.6752e-04
Loss = 9.0208e-02, PNorm = 84.0400, GNorm = 0.6609, lr_0 = 1.6741e-04
Loss = 7.7274e-02, PNorm = 84.0421, GNorm = 0.4816, lr_0 = 1.6729e-04
Loss = 8.6331e-02, PNorm = 84.0433, GNorm = 0.8677, lr_0 = 1.6718e-04
Loss = 8.9377e-02, PNorm = 84.0458, GNorm = 0.5964, lr_0 = 1.6707e-04
Loss = 9.7871e-02, PNorm = 84.0498, GNorm = 0.5784, lr_0 = 1.6695e-04
Loss = 7.7001e-02, PNorm = 84.0507, GNorm = 0.5142, lr_0 = 1.6684e-04
Loss = 8.7067e-02, PNorm = 84.0525, GNorm = 0.5042, lr_0 = 1.6672e-04
Loss = 9.0188e-02, PNorm = 84.0556, GNorm = 0.7203, lr_0 = 1.6661e-04
Loss = 1.0131e-01, PNorm = 84.0596, GNorm = 0.6119, lr_0 = 1.6649e-04
Loss = 8.7555e-02, PNorm = 84.0633, GNorm = 0.7830, lr_0 = 1.6638e-04
Loss = 7.7914e-02, PNorm = 84.0665, GNorm = 0.6050, lr_0 = 1.6627e-04
Loss = 9.4535e-02, PNorm = 84.0681, GNorm = 0.7854, lr_0 = 1.6615e-04
Loss = 8.0212e-02, PNorm = 84.0678, GNorm = 0.8759, lr_0 = 1.6604e-04
Loss = 8.1866e-02, PNorm = 84.0707, GNorm = 1.1623, lr_0 = 1.6592e-04
Loss = 9.3478e-02, PNorm = 84.0715, GNorm = 0.8828, lr_0 = 1.6581e-04
Loss = 9.1126e-02, PNorm = 84.0753, GNorm = 0.6244, lr_0 = 1.6570e-04
Loss = 8.0919e-02, PNorm = 84.0774, GNorm = 0.7260, lr_0 = 1.6558e-04
Loss = 8.9170e-02, PNorm = 84.0806, GNorm = 0.4767, lr_0 = 1.6547e-04
Loss = 8.9598e-02, PNorm = 84.0855, GNorm = 0.8529, lr_0 = 1.6536e-04
Loss = 7.7879e-02, PNorm = 84.0885, GNorm = 0.5686, lr_0 = 1.6524e-04
Loss = 8.3459e-02, PNorm = 84.0898, GNorm = 0.6777, lr_0 = 1.6513e-04
Loss = 8.7513e-02, PNorm = 84.0929, GNorm = 0.7712, lr_0 = 1.6502e-04
Loss = 8.9118e-02, PNorm = 84.0960, GNorm = 0.7832, lr_0 = 1.6490e-04
Loss = 9.0069e-02, PNorm = 84.1010, GNorm = 0.7595, lr_0 = 1.6479e-04
Loss = 1.0463e-01, PNorm = 84.1053, GNorm = 0.6976, lr_0 = 1.6468e-04
Loss = 8.5122e-02, PNorm = 84.1077, GNorm = 0.6704, lr_0 = 1.6457e-04
Loss = 8.6733e-02, PNorm = 84.1106, GNorm = 0.5344, lr_0 = 1.6445e-04
Loss = 7.5906e-02, PNorm = 84.1137, GNorm = 0.6081, lr_0 = 1.6434e-04
Loss = 9.3960e-02, PNorm = 84.1134, GNorm = 0.6664, lr_0 = 1.6423e-04
Loss = 8.6390e-02, PNorm = 84.1151, GNorm = 0.6781, lr_0 = 1.6412e-04
Loss = 8.2768e-02, PNorm = 84.1172, GNorm = 0.4551, lr_0 = 1.6400e-04
Loss = 8.4075e-02, PNorm = 84.1194, GNorm = 0.6613, lr_0 = 1.6389e-04
Loss = 9.4594e-02, PNorm = 84.1208, GNorm = 0.7047, lr_0 = 1.6378e-04
Validation mae = 0.228004
Epoch 24
Loss = 7.4096e-02, PNorm = 84.1228, GNorm = 0.5562, lr_0 = 1.6367e-04
Loss = 9.2747e-02, PNorm = 84.1267, GNorm = 0.5741, lr_0 = 1.6355e-04
Loss = 7.6382e-02, PNorm = 84.1282, GNorm = 0.4919, lr_0 = 1.6344e-04
Loss = 8.5014e-02, PNorm = 84.1309, GNorm = 0.6464, lr_0 = 1.6333e-04
Loss = 7.6441e-02, PNorm = 84.1344, GNorm = 0.6330, lr_0 = 1.6322e-04
Loss = 8.3992e-02, PNorm = 84.1363, GNorm = 0.8433, lr_0 = 1.6311e-04
Loss = 8.5185e-02, PNorm = 84.1399, GNorm = 0.8411, lr_0 = 1.6299e-04
Loss = 7.7354e-02, PNorm = 84.1427, GNorm = 0.5186, lr_0 = 1.6288e-04
Loss = 7.3692e-02, PNorm = 84.1450, GNorm = 0.4389, lr_0 = 1.6277e-04
Loss = 8.0315e-02, PNorm = 84.1470, GNorm = 0.6484, lr_0 = 1.6266e-04
Loss = 7.7487e-02, PNorm = 84.1496, GNorm = 0.6229, lr_0 = 1.6255e-04
Loss = 6.6913e-02, PNorm = 84.1525, GNorm = 0.5439, lr_0 = 1.6244e-04
Loss = 1.0158e-01, PNorm = 84.1546, GNorm = 0.6207, lr_0 = 1.6233e-04
Loss = 8.4405e-02, PNorm = 84.1577, GNorm = 0.6136, lr_0 = 1.6221e-04
Loss = 8.2735e-02, PNorm = 84.1615, GNorm = 0.4102, lr_0 = 1.6210e-04
Loss = 7.9167e-02, PNorm = 84.1637, GNorm = 0.5441, lr_0 = 1.6199e-04
Loss = 7.1916e-02, PNorm = 84.1658, GNorm = 0.5402, lr_0 = 1.6188e-04
Loss = 8.2388e-02, PNorm = 84.1684, GNorm = 0.5412, lr_0 = 1.6177e-04
Loss = 6.8918e-02, PNorm = 84.1696, GNorm = 0.4678, lr_0 = 1.6166e-04
Loss = 8.2501e-02, PNorm = 84.1724, GNorm = 0.5997, lr_0 = 1.6155e-04
Loss = 8.7638e-02, PNorm = 84.1756, GNorm = 0.5898, lr_0 = 1.6144e-04
Loss = 9.2540e-02, PNorm = 84.1789, GNorm = 0.5546, lr_0 = 1.6133e-04
Loss = 6.8341e-02, PNorm = 84.1827, GNorm = 0.5740, lr_0 = 1.6122e-04
Loss = 8.6570e-02, PNorm = 84.1870, GNorm = 0.7370, lr_0 = 1.6111e-04
Loss = 7.4181e-02, PNorm = 84.1895, GNorm = 0.5471, lr_0 = 1.6100e-04
Loss = 9.3046e-02, PNorm = 84.1925, GNorm = 0.7524, lr_0 = 1.6089e-04
Loss = 7.9889e-02, PNorm = 84.1973, GNorm = 0.6077, lr_0 = 1.6078e-04
Loss = 7.3968e-02, PNorm = 84.2010, GNorm = 0.4574, lr_0 = 1.6067e-04
Loss = 7.9738e-02, PNorm = 84.2020, GNorm = 0.4952, lr_0 = 1.6056e-04
Loss = 7.5260e-02, PNorm = 84.2045, GNorm = 0.5153, lr_0 = 1.6045e-04
Loss = 7.6608e-02, PNorm = 84.2076, GNorm = 0.4815, lr_0 = 1.6034e-04
Loss = 8.0251e-02, PNorm = 84.2117, GNorm = 0.4115, lr_0 = 1.6023e-04
Loss = 9.6370e-02, PNorm = 84.2164, GNorm = 0.7940, lr_0 = 1.6012e-04
Loss = 8.2537e-02, PNorm = 84.2183, GNorm = 0.4759, lr_0 = 1.6001e-04
Loss = 8.2767e-02, PNorm = 84.2202, GNorm = 0.6086, lr_0 = 1.5990e-04
Loss = 8.3331e-02, PNorm = 84.2235, GNorm = 0.5384, lr_0 = 1.5979e-04
Loss = 7.4985e-02, PNorm = 84.2238, GNorm = 0.5311, lr_0 = 1.5968e-04
Loss = 7.3119e-02, PNorm = 84.2242, GNorm = 0.6878, lr_0 = 1.5957e-04
Loss = 6.7305e-02, PNorm = 84.2269, GNorm = 0.7324, lr_0 = 1.5946e-04
Loss = 7.9328e-02, PNorm = 84.2302, GNorm = 0.6711, lr_0 = 1.5935e-04
Loss = 8.5423e-02, PNorm = 84.2310, GNorm = 0.5700, lr_0 = 1.5924e-04
Loss = 8.0551e-02, PNorm = 84.2326, GNorm = 0.5300, lr_0 = 1.5913e-04
Loss = 9.2875e-02, PNorm = 84.2372, GNorm = 0.6713, lr_0 = 1.5902e-04
Loss = 8.2053e-02, PNorm = 84.2415, GNorm = 0.5881, lr_0 = 1.5891e-04
Loss = 8.9862e-02, PNorm = 84.2443, GNorm = 0.6464, lr_0 = 1.5880e-04
Loss = 8.0402e-02, PNorm = 84.2469, GNorm = 0.8420, lr_0 = 1.5870e-04
Loss = 9.0741e-02, PNorm = 84.2507, GNorm = 0.6861, lr_0 = 1.5859e-04
Loss = 7.3419e-02, PNorm = 84.2531, GNorm = 0.4833, lr_0 = 1.5848e-04
Loss = 8.6026e-02, PNorm = 84.2562, GNorm = 0.6006, lr_0 = 1.5837e-04
Loss = 8.9694e-02, PNorm = 84.2608, GNorm = 0.7255, lr_0 = 1.5826e-04
Loss = 9.8832e-02, PNorm = 84.2641, GNorm = 0.6297, lr_0 = 1.5815e-04
Loss = 7.4444e-02, PNorm = 84.2661, GNorm = 0.6295, lr_0 = 1.5804e-04
Loss = 7.5917e-02, PNorm = 84.2694, GNorm = 0.6040, lr_0 = 1.5794e-04
Loss = 6.8664e-02, PNorm = 84.2724, GNorm = 0.5944, lr_0 = 1.5783e-04
Loss = 9.8816e-02, PNorm = 84.2735, GNorm = 0.5765, lr_0 = 1.5772e-04
Loss = 8.0232e-02, PNorm = 84.2769, GNorm = 0.5713, lr_0 = 1.5761e-04
Loss = 9.1322e-02, PNorm = 84.2799, GNorm = 0.5956, lr_0 = 1.5750e-04
Loss = 7.9205e-02, PNorm = 84.2827, GNorm = 0.6236, lr_0 = 1.5740e-04
Loss = 9.4835e-02, PNorm = 84.2867, GNorm = 0.5830, lr_0 = 1.5729e-04
Loss = 8.6257e-02, PNorm = 84.2908, GNorm = 0.8329, lr_0 = 1.5718e-04
Loss = 7.2866e-02, PNorm = 84.2912, GNorm = 0.6877, lr_0 = 1.5707e-04
Loss = 8.0049e-02, PNorm = 84.2928, GNorm = 0.4961, lr_0 = 1.5697e-04
Loss = 8.0907e-02, PNorm = 84.2958, GNorm = 0.6281, lr_0 = 1.5686e-04
Loss = 8.0182e-02, PNorm = 84.3003, GNorm = 0.8384, lr_0 = 1.5675e-04
Loss = 8.4467e-02, PNorm = 84.3042, GNorm = 0.6258, lr_0 = 1.5664e-04
Loss = 7.5818e-02, PNorm = 84.3070, GNorm = 0.5481, lr_0 = 1.5654e-04
Loss = 8.2400e-02, PNorm = 84.3094, GNorm = 0.6136, lr_0 = 1.5643e-04
Loss = 7.2385e-02, PNorm = 84.3116, GNorm = 0.5481, lr_0 = 1.5632e-04
Loss = 7.3463e-02, PNorm = 84.3138, GNorm = 0.6891, lr_0 = 1.5621e-04
Loss = 6.9671e-02, PNorm = 84.3151, GNorm = 0.5131, lr_0 = 1.5611e-04
Loss = 8.4662e-02, PNorm = 84.3156, GNorm = 0.9872, lr_0 = 1.5600e-04
Loss = 8.6749e-02, PNorm = 84.3188, GNorm = 0.5781, lr_0 = 1.5589e-04
Loss = 8.7956e-02, PNorm = 84.3207, GNorm = 0.5172, lr_0 = 1.5579e-04
Loss = 8.6255e-02, PNorm = 84.3236, GNorm = 0.6951, lr_0 = 1.5568e-04
Loss = 7.9363e-02, PNorm = 84.3273, GNorm = 0.6213, lr_0 = 1.5557e-04
Loss = 9.5716e-02, PNorm = 84.3287, GNorm = 0.7620, lr_0 = 1.5547e-04
Loss = 8.9901e-02, PNorm = 84.3311, GNorm = 0.8272, lr_0 = 1.5536e-04
Loss = 8.0462e-02, PNorm = 84.3335, GNorm = 0.5760, lr_0 = 1.5525e-04
Loss = 8.8342e-02, PNorm = 84.3341, GNorm = 0.6478, lr_0 = 1.5515e-04
Loss = 9.9188e-02, PNorm = 84.3384, GNorm = 0.6218, lr_0 = 1.5504e-04
Loss = 7.8184e-02, PNorm = 84.3414, GNorm = 0.6330, lr_0 = 1.5493e-04
Loss = 9.0730e-02, PNorm = 84.3432, GNorm = 0.6838, lr_0 = 1.5483e-04
Loss = 9.0567e-02, PNorm = 84.3451, GNorm = 0.6909, lr_0 = 1.5472e-04
Loss = 8.7572e-02, PNorm = 84.3467, GNorm = 0.7802, lr_0 = 1.5462e-04
Loss = 8.9027e-02, PNorm = 84.3480, GNorm = 0.7593, lr_0 = 1.5451e-04
Loss = 7.7895e-02, PNorm = 84.3500, GNorm = 0.6115, lr_0 = 1.5440e-04
Loss = 8.9763e-02, PNorm = 84.3524, GNorm = 0.5909, lr_0 = 1.5430e-04
Loss = 8.6584e-02, PNorm = 84.3541, GNorm = 0.6447, lr_0 = 1.5419e-04
Loss = 8.2150e-02, PNorm = 84.3556, GNorm = 0.5137, lr_0 = 1.5409e-04
Loss = 8.3823e-02, PNorm = 84.3583, GNorm = 0.7513, lr_0 = 1.5398e-04
Loss = 8.4471e-02, PNorm = 84.3605, GNorm = 0.8978, lr_0 = 1.5388e-04
Loss = 8.0786e-02, PNorm = 84.3620, GNorm = 0.8563, lr_0 = 1.5377e-04
Loss = 8.2001e-02, PNorm = 84.3636, GNorm = 0.7427, lr_0 = 1.5367e-04
Loss = 9.0366e-02, PNorm = 84.3670, GNorm = 0.6804, lr_0 = 1.5356e-04
Loss = 8.8698e-02, PNorm = 84.3706, GNorm = 0.6192, lr_0 = 1.5346e-04
Loss = 7.8731e-02, PNorm = 84.3729, GNorm = 0.4964, lr_0 = 1.5335e-04
Loss = 7.5722e-02, PNorm = 84.3735, GNorm = 0.5377, lr_0 = 1.5325e-04
Loss = 8.3157e-02, PNorm = 84.3736, GNorm = 0.6257, lr_0 = 1.5314e-04
Loss = 8.9646e-02, PNorm = 84.3751, GNorm = 0.7444, lr_0 = 1.5304e-04
Loss = 7.6538e-02, PNorm = 84.3778, GNorm = 0.5780, lr_0 = 1.5293e-04
Loss = 8.6926e-02, PNorm = 84.3791, GNorm = 0.5785, lr_0 = 1.5283e-04
Loss = 8.3566e-02, PNorm = 84.3834, GNorm = 0.5885, lr_0 = 1.5272e-04
Loss = 8.0607e-02, PNorm = 84.3861, GNorm = 0.6120, lr_0 = 1.5262e-04
Loss = 8.2349e-02, PNorm = 84.3882, GNorm = 0.6441, lr_0 = 1.5251e-04
Loss = 8.8504e-02, PNorm = 84.3907, GNorm = 0.5092, lr_0 = 1.5241e-04
Loss = 7.7577e-02, PNorm = 84.3919, GNorm = 0.6699, lr_0 = 1.5230e-04
Loss = 8.9540e-02, PNorm = 84.3935, GNorm = 0.7085, lr_0 = 1.5220e-04
Loss = 8.4072e-02, PNorm = 84.3941, GNorm = 0.5943, lr_0 = 1.5209e-04
Loss = 8.5854e-02, PNorm = 84.3973, GNorm = 0.7491, lr_0 = 1.5199e-04
Loss = 7.9583e-02, PNorm = 84.3998, GNorm = 0.5972, lr_0 = 1.5189e-04
Loss = 8.7844e-02, PNorm = 84.4037, GNorm = 0.5783, lr_0 = 1.5178e-04
Loss = 7.7366e-02, PNorm = 84.4062, GNorm = 0.4755, lr_0 = 1.5168e-04
Loss = 8.2410e-02, PNorm = 84.4081, GNorm = 0.5743, lr_0 = 1.5157e-04
Loss = 7.8491e-02, PNorm = 84.4106, GNorm = 0.5181, lr_0 = 1.5147e-04
Loss = 7.7492e-02, PNorm = 84.4115, GNorm = 0.8029, lr_0 = 1.5137e-04
Loss = 7.8122e-02, PNorm = 84.4146, GNorm = 0.4684, lr_0 = 1.5126e-04
Loss = 8.9943e-02, PNorm = 84.4175, GNorm = 0.6414, lr_0 = 1.5116e-04
Loss = 8.0703e-02, PNorm = 84.4193, GNorm = 0.6338, lr_0 = 1.5106e-04
Loss = 1.0713e-01, PNorm = 84.4205, GNorm = 0.6680, lr_0 = 1.5095e-04
Loss = 9.1934e-02, PNorm = 84.4230, GNorm = 0.6240, lr_0 = 1.5085e-04
Validation mae = 0.229557
Epoch 25
Loss = 8.5093e-02, PNorm = 84.4256, GNorm = 0.6432, lr_0 = 1.5075e-04
Loss = 8.8260e-02, PNorm = 84.4295, GNorm = 0.6189, lr_0 = 1.5064e-04
Loss = 7.7027e-02, PNorm = 84.4325, GNorm = 0.6737, lr_0 = 1.5054e-04
Loss = 7.4824e-02, PNorm = 84.4347, GNorm = 0.5294, lr_0 = 1.5044e-04
Loss = 7.8811e-02, PNorm = 84.4371, GNorm = 0.6494, lr_0 = 1.5033e-04
Loss = 8.2712e-02, PNorm = 84.4381, GNorm = 0.5714, lr_0 = 1.5023e-04
Loss = 8.8522e-02, PNorm = 84.4395, GNorm = 0.6499, lr_0 = 1.5013e-04
Loss = 8.6589e-02, PNorm = 84.4433, GNorm = 0.6100, lr_0 = 1.5002e-04
Loss = 7.8059e-02, PNorm = 84.4471, GNorm = 0.5656, lr_0 = 1.4992e-04
Loss = 6.9831e-02, PNorm = 84.4493, GNorm = 0.6025, lr_0 = 1.4982e-04
Loss = 6.6733e-02, PNorm = 84.4510, GNorm = 0.5824, lr_0 = 1.4972e-04
Loss = 8.4716e-02, PNorm = 84.4549, GNorm = 0.6159, lr_0 = 1.4961e-04
Loss = 8.2598e-02, PNorm = 84.4558, GNorm = 0.5315, lr_0 = 1.4951e-04
Loss = 8.7785e-02, PNorm = 84.4589, GNorm = 0.5640, lr_0 = 1.4941e-04
Loss = 8.1597e-02, PNorm = 84.4637, GNorm = 0.4741, lr_0 = 1.4931e-04
Loss = 7.8151e-02, PNorm = 84.4677, GNorm = 0.6206, lr_0 = 1.4920e-04
Loss = 7.3915e-02, PNorm = 84.4707, GNorm = 0.6046, lr_0 = 1.4910e-04
Loss = 7.0431e-02, PNorm = 84.4751, GNorm = 0.6146, lr_0 = 1.4900e-04
Loss = 7.7224e-02, PNorm = 84.4767, GNorm = 0.6864, lr_0 = 1.4890e-04
Loss = 7.6411e-02, PNorm = 84.4761, GNorm = 0.5253, lr_0 = 1.4880e-04
Loss = 7.3326e-02, PNorm = 84.4781, GNorm = 0.5447, lr_0 = 1.4869e-04
Loss = 6.5743e-02, PNorm = 84.4815, GNorm = 0.5119, lr_0 = 1.4859e-04
Loss = 8.4779e-02, PNorm = 84.4851, GNorm = 0.6217, lr_0 = 1.4849e-04
Loss = 7.6154e-02, PNorm = 84.4902, GNorm = 0.5686, lr_0 = 1.4839e-04
Loss = 8.1830e-02, PNorm = 84.4941, GNorm = 0.5712, lr_0 = 1.4829e-04
Loss = 6.8468e-02, PNorm = 84.4974, GNorm = 0.6057, lr_0 = 1.4818e-04
Loss = 7.8106e-02, PNorm = 84.5004, GNorm = 0.6109, lr_0 = 1.4808e-04
Loss = 8.3207e-02, PNorm = 84.5008, GNorm = 0.7140, lr_0 = 1.4798e-04
Loss = 8.1230e-02, PNorm = 84.5029, GNorm = 0.8313, lr_0 = 1.4788e-04
Loss = 7.8254e-02, PNorm = 84.5043, GNorm = 0.5380, lr_0 = 1.4778e-04
Loss = 7.1770e-02, PNorm = 84.5058, GNorm = 0.7422, lr_0 = 1.4768e-04
Loss = 7.8707e-02, PNorm = 84.5065, GNorm = 0.5737, lr_0 = 1.4758e-04
Loss = 7.5737e-02, PNorm = 84.5091, GNorm = 0.5985, lr_0 = 1.4748e-04
Loss = 1.0177e-01, PNorm = 84.5135, GNorm = 0.7336, lr_0 = 1.4737e-04
Loss = 8.4654e-02, PNorm = 84.5145, GNorm = 0.5624, lr_0 = 1.4727e-04
Loss = 8.2419e-02, PNorm = 84.5151, GNorm = 0.5717, lr_0 = 1.4717e-04
Loss = 8.3415e-02, PNorm = 84.5186, GNorm = 0.6546, lr_0 = 1.4707e-04
Loss = 9.4875e-02, PNorm = 84.5205, GNorm = 0.6879, lr_0 = 1.4697e-04
Loss = 8.0010e-02, PNorm = 84.5214, GNorm = 0.8412, lr_0 = 1.4687e-04
Loss = 7.7882e-02, PNorm = 84.5228, GNorm = 0.7352, lr_0 = 1.4677e-04
Loss = 9.1525e-02, PNorm = 84.5269, GNorm = 0.7462, lr_0 = 1.4667e-04
Loss = 8.1622e-02, PNorm = 84.5309, GNorm = 0.8872, lr_0 = 1.4657e-04
Loss = 7.7411e-02, PNorm = 84.5310, GNorm = 0.4556, lr_0 = 1.4647e-04
Loss = 6.9137e-02, PNorm = 84.5324, GNorm = 0.5587, lr_0 = 1.4637e-04
Loss = 9.0942e-02, PNorm = 84.5350, GNorm = 0.8184, lr_0 = 1.4627e-04
Loss = 7.8142e-02, PNorm = 84.5367, GNorm = 0.5866, lr_0 = 1.4617e-04
Loss = 8.8284e-02, PNorm = 84.5380, GNorm = 0.6594, lr_0 = 1.4607e-04
Loss = 7.5343e-02, PNorm = 84.5428, GNorm = 0.8199, lr_0 = 1.4597e-04
Loss = 7.5636e-02, PNorm = 84.5439, GNorm = 0.4958, lr_0 = 1.4587e-04
Loss = 8.9775e-02, PNorm = 84.5454, GNorm = 0.5696, lr_0 = 1.4577e-04
Loss = 7.6109e-02, PNorm = 84.5460, GNorm = 0.5449, lr_0 = 1.4567e-04
Loss = 7.8908e-02, PNorm = 84.5463, GNorm = 0.6325, lr_0 = 1.4557e-04
Loss = 8.4056e-02, PNorm = 84.5483, GNorm = 0.7127, lr_0 = 1.4547e-04
Loss = 7.8986e-02, PNorm = 84.5502, GNorm = 0.5295, lr_0 = 1.4537e-04
Loss = 8.9609e-02, PNorm = 84.5514, GNorm = 0.5736, lr_0 = 1.4527e-04
Loss = 8.1940e-02, PNorm = 84.5545, GNorm = 0.5561, lr_0 = 1.4517e-04
Loss = 7.9263e-02, PNorm = 84.5559, GNorm = 0.8943, lr_0 = 1.4507e-04
Loss = 8.0882e-02, PNorm = 84.5606, GNorm = 0.5909, lr_0 = 1.4497e-04
Loss = 8.2267e-02, PNorm = 84.5625, GNorm = 0.6152, lr_0 = 1.4487e-04
Loss = 7.6580e-02, PNorm = 84.5649, GNorm = 0.7558, lr_0 = 1.4477e-04
Loss = 8.8952e-02, PNorm = 84.5687, GNorm = 0.7455, lr_0 = 1.4467e-04
Loss = 7.8867e-02, PNorm = 84.5711, GNorm = 0.7153, lr_0 = 1.4457e-04
Loss = 8.4977e-02, PNorm = 84.5725, GNorm = 0.6874, lr_0 = 1.4447e-04
Loss = 8.7320e-02, PNorm = 84.5726, GNorm = 0.5632, lr_0 = 1.4438e-04
Loss = 9.4113e-02, PNorm = 84.5741, GNorm = 0.8769, lr_0 = 1.4428e-04
Loss = 7.5915e-02, PNorm = 84.5771, GNorm = 0.5124, lr_0 = 1.4418e-04
Loss = 7.6011e-02, PNorm = 84.5792, GNorm = 0.6263, lr_0 = 1.4408e-04
Loss = 7.2528e-02, PNorm = 84.5814, GNorm = 0.5589, lr_0 = 1.4398e-04
Loss = 8.8615e-02, PNorm = 84.5844, GNorm = 0.7135, lr_0 = 1.4388e-04
Loss = 7.6212e-02, PNorm = 84.5873, GNorm = 0.6878, lr_0 = 1.4378e-04
Loss = 8.0003e-02, PNorm = 84.5889, GNorm = 0.6905, lr_0 = 1.4368e-04
Loss = 8.2512e-02, PNorm = 84.5908, GNorm = 0.7091, lr_0 = 1.4359e-04
Loss = 8.8043e-02, PNorm = 84.5927, GNorm = 0.5769, lr_0 = 1.4349e-04
Loss = 7.3972e-02, PNorm = 84.5963, GNorm = 0.7080, lr_0 = 1.4339e-04
Loss = 8.9911e-02, PNorm = 84.5978, GNorm = 0.6754, lr_0 = 1.4329e-04
Loss = 9.1353e-02, PNorm = 84.6011, GNorm = 0.5609, lr_0 = 1.4319e-04
Loss = 7.8075e-02, PNorm = 84.6034, GNorm = 0.6499, lr_0 = 1.4310e-04
Loss = 8.5718e-02, PNorm = 84.6055, GNorm = 1.0815, lr_0 = 1.4300e-04
Loss = 9.2186e-02, PNorm = 84.6068, GNorm = 0.6427, lr_0 = 1.4290e-04
Loss = 7.9006e-02, PNorm = 84.6085, GNorm = 0.5042, lr_0 = 1.4280e-04
Loss = 9.3005e-02, PNorm = 84.6105, GNorm = 0.6044, lr_0 = 1.4270e-04
Loss = 8.5396e-02, PNorm = 84.6130, GNorm = 0.5489, lr_0 = 1.4261e-04
Loss = 8.5240e-02, PNorm = 84.6173, GNorm = 0.6520, lr_0 = 1.4251e-04
Loss = 7.9646e-02, PNorm = 84.6201, GNorm = 0.6414, lr_0 = 1.4241e-04
Loss = 8.8872e-02, PNorm = 84.6216, GNorm = 0.8743, lr_0 = 1.4231e-04
Loss = 8.1566e-02, PNorm = 84.6225, GNorm = 0.6439, lr_0 = 1.4222e-04
Loss = 8.3701e-02, PNorm = 84.6253, GNorm = 0.7222, lr_0 = 1.4212e-04
Loss = 7.6620e-02, PNorm = 84.6273, GNorm = 0.5351, lr_0 = 1.4202e-04
Loss = 7.9540e-02, PNorm = 84.6305, GNorm = 0.6076, lr_0 = 1.4192e-04
Loss = 7.8232e-02, PNorm = 84.6324, GNorm = 0.6027, lr_0 = 1.4183e-04
Loss = 8.1265e-02, PNorm = 84.6333, GNorm = 0.5489, lr_0 = 1.4173e-04
Loss = 7.8459e-02, PNorm = 84.6363, GNorm = 0.7496, lr_0 = 1.4163e-04
Loss = 7.9838e-02, PNorm = 84.6389, GNorm = 0.6205, lr_0 = 1.4153e-04
Loss = 8.7672e-02, PNorm = 84.6409, GNorm = 0.7976, lr_0 = 1.4144e-04
Loss = 8.2031e-02, PNorm = 84.6418, GNorm = 0.7604, lr_0 = 1.4134e-04
Loss = 8.5344e-02, PNorm = 84.6439, GNorm = 0.8606, lr_0 = 1.4124e-04
Loss = 7.7586e-02, PNorm = 84.6469, GNorm = 1.2503, lr_0 = 1.4115e-04
Loss = 8.9024e-02, PNorm = 84.6495, GNorm = 0.6303, lr_0 = 1.4105e-04
Loss = 7.3952e-02, PNorm = 84.6516, GNorm = 0.6830, lr_0 = 1.4095e-04
Loss = 9.1282e-02, PNorm = 84.6534, GNorm = 0.5753, lr_0 = 1.4086e-04
Loss = 8.5406e-02, PNorm = 84.6549, GNorm = 0.6486, lr_0 = 1.4076e-04
Loss = 7.8641e-02, PNorm = 84.6569, GNorm = 0.5667, lr_0 = 1.4066e-04
Loss = 9.4629e-02, PNorm = 84.6587, GNorm = 0.4636, lr_0 = 1.4057e-04
Loss = 7.2723e-02, PNorm = 84.6602, GNorm = 0.5605, lr_0 = 1.4047e-04
Loss = 7.6078e-02, PNorm = 84.6608, GNorm = 0.5090, lr_0 = 1.4038e-04
Loss = 7.3974e-02, PNorm = 84.6620, GNorm = 0.5650, lr_0 = 1.4028e-04
Loss = 8.5371e-02, PNorm = 84.6639, GNorm = 0.6479, lr_0 = 1.4018e-04
Loss = 9.0982e-02, PNorm = 84.6659, GNorm = 0.8820, lr_0 = 1.4009e-04
Loss = 9.1664e-02, PNorm = 84.6679, GNorm = 0.6441, lr_0 = 1.3999e-04
Loss = 8.4784e-02, PNorm = 84.6692, GNorm = 0.5414, lr_0 = 1.3990e-04
Loss = 8.1550e-02, PNorm = 84.6722, GNorm = 0.5501, lr_0 = 1.3980e-04
Loss = 7.8063e-02, PNorm = 84.6747, GNorm = 0.6614, lr_0 = 1.3970e-04
Loss = 7.5677e-02, PNorm = 84.6755, GNorm = 0.4971, lr_0 = 1.3961e-04
Loss = 8.2326e-02, PNorm = 84.6778, GNorm = 0.6848, lr_0 = 1.3951e-04
Loss = 7.9455e-02, PNorm = 84.6819, GNorm = 0.6900, lr_0 = 1.3942e-04
Loss = 8.2427e-02, PNorm = 84.6847, GNorm = 0.5401, lr_0 = 1.3932e-04
Loss = 8.6732e-02, PNorm = 84.6875, GNorm = 0.5124, lr_0 = 1.3923e-04
Loss = 8.3236e-02, PNorm = 84.6899, GNorm = 0.6384, lr_0 = 1.3913e-04
Loss = 8.4089e-02, PNorm = 84.6909, GNorm = 0.5743, lr_0 = 1.3904e-04
Loss = 8.8855e-02, PNorm = 84.6928, GNorm = 0.8231, lr_0 = 1.3894e-04
Validation mae = 0.226302
Epoch 26
Loss = 8.2393e-02, PNorm = 84.6969, GNorm = 0.4493, lr_0 = 1.3884e-04
Loss = 8.6598e-02, PNorm = 84.6996, GNorm = 0.5421, lr_0 = 1.3875e-04
Loss = 7.1887e-02, PNorm = 84.7006, GNorm = 0.5602, lr_0 = 1.3865e-04
Loss = 8.4613e-02, PNorm = 84.7020, GNorm = 0.5605, lr_0 = 1.3856e-04
Loss = 8.1637e-02, PNorm = 84.7055, GNorm = 0.5706, lr_0 = 1.3846e-04
Loss = 9.4940e-02, PNorm = 84.7091, GNorm = 0.7401, lr_0 = 1.3837e-04
Loss = 7.3827e-02, PNorm = 84.7123, GNorm = 0.7867, lr_0 = 1.3828e-04
Loss = 8.5026e-02, PNorm = 84.7150, GNorm = 0.5589, lr_0 = 1.3818e-04
Loss = 7.2709e-02, PNorm = 84.7181, GNorm = 0.8365, lr_0 = 1.3809e-04
Loss = 8.0753e-02, PNorm = 84.7191, GNorm = 0.9126, lr_0 = 1.3799e-04
Loss = 7.8588e-02, PNorm = 84.7217, GNorm = 0.9281, lr_0 = 1.3790e-04
Loss = 7.4655e-02, PNorm = 84.7241, GNorm = 0.7251, lr_0 = 1.3780e-04
Loss = 7.6752e-02, PNorm = 84.7256, GNorm = 0.7897, lr_0 = 1.3771e-04
Loss = 8.5841e-02, PNorm = 84.7271, GNorm = 0.8354, lr_0 = 1.3761e-04
Loss = 8.5275e-02, PNorm = 84.7290, GNorm = 0.5809, lr_0 = 1.3752e-04
Loss = 8.9161e-02, PNorm = 84.7295, GNorm = 0.8236, lr_0 = 1.3742e-04
Loss = 7.7962e-02, PNorm = 84.7307, GNorm = 0.6528, lr_0 = 1.3733e-04
Loss = 7.5910e-02, PNorm = 84.7333, GNorm = 0.7113, lr_0 = 1.3724e-04
Loss = 8.3003e-02, PNorm = 84.7352, GNorm = 0.7582, lr_0 = 1.3714e-04
Loss = 7.2218e-02, PNorm = 84.7356, GNorm = 0.6362, lr_0 = 1.3705e-04
Loss = 7.9468e-02, PNorm = 84.7376, GNorm = 0.6988, lr_0 = 1.3695e-04
Loss = 7.4824e-02, PNorm = 84.7410, GNorm = 0.6297, lr_0 = 1.3686e-04
Loss = 8.0498e-02, PNorm = 84.7441, GNorm = 0.5672, lr_0 = 1.3677e-04
Loss = 8.8116e-02, PNorm = 84.7445, GNorm = 0.7302, lr_0 = 1.3667e-04
Loss = 7.5563e-02, PNorm = 84.7456, GNorm = 0.6031, lr_0 = 1.3658e-04
Loss = 7.6796e-02, PNorm = 84.7486, GNorm = 0.7762, lr_0 = 1.3649e-04
Loss = 8.8171e-02, PNorm = 84.7501, GNorm = 0.6732, lr_0 = 1.3639e-04
Loss = 7.3883e-02, PNorm = 84.7534, GNorm = 0.6210, lr_0 = 1.3630e-04
Loss = 8.1709e-02, PNorm = 84.7568, GNorm = 0.6002, lr_0 = 1.3621e-04
Loss = 7.5221e-02, PNorm = 84.7584, GNorm = 0.4706, lr_0 = 1.3611e-04
Loss = 7.8255e-02, PNorm = 84.7609, GNorm = 0.7230, lr_0 = 1.3602e-04
Loss = 8.7176e-02, PNorm = 84.7630, GNorm = 0.7287, lr_0 = 1.3593e-04
Loss = 8.0726e-02, PNorm = 84.7653, GNorm = 0.6626, lr_0 = 1.3583e-04
Loss = 9.8840e-02, PNorm = 84.7688, GNorm = 0.7668, lr_0 = 1.3574e-04
Loss = 7.4976e-02, PNorm = 84.7708, GNorm = 0.5442, lr_0 = 1.3565e-04
Loss = 7.6640e-02, PNorm = 84.7731, GNorm = 0.6191, lr_0 = 1.3555e-04
Loss = 6.9137e-02, PNorm = 84.7765, GNorm = 0.5855, lr_0 = 1.3546e-04
Loss = 8.1929e-02, PNorm = 84.7788, GNorm = 0.6465, lr_0 = 1.3537e-04
Loss = 7.3662e-02, PNorm = 84.7817, GNorm = 0.8865, lr_0 = 1.3528e-04
Loss = 8.0821e-02, PNorm = 84.7840, GNorm = 0.7073, lr_0 = 1.3518e-04
Loss = 7.6628e-02, PNorm = 84.7861, GNorm = 0.6045, lr_0 = 1.3509e-04
Loss = 7.1894e-02, PNorm = 84.7886, GNorm = 0.6432, lr_0 = 1.3500e-04
Loss = 7.3942e-02, PNorm = 84.7889, GNorm = 0.5745, lr_0 = 1.3491e-04
Loss = 7.1288e-02, PNorm = 84.7892, GNorm = 0.5958, lr_0 = 1.3481e-04
Loss = 7.5598e-02, PNorm = 84.7923, GNorm = 0.5855, lr_0 = 1.3472e-04
Loss = 7.4828e-02, PNorm = 84.7953, GNorm = 0.5180, lr_0 = 1.3463e-04
Loss = 7.7527e-02, PNorm = 84.7969, GNorm = 0.7058, lr_0 = 1.3454e-04
Loss = 8.0021e-02, PNorm = 84.7987, GNorm = 0.4529, lr_0 = 1.3444e-04
Loss = 7.7224e-02, PNorm = 84.7999, GNorm = 0.8047, lr_0 = 1.3435e-04
Loss = 8.3131e-02, PNorm = 84.8012, GNorm = 0.6834, lr_0 = 1.3426e-04
Loss = 7.8463e-02, PNorm = 84.8024, GNorm = 0.5842, lr_0 = 1.3417e-04
Loss = 7.4977e-02, PNorm = 84.8036, GNorm = 0.6235, lr_0 = 1.3408e-04
Loss = 7.9303e-02, PNorm = 84.8045, GNorm = 0.8597, lr_0 = 1.3398e-04
Loss = 7.7352e-02, PNorm = 84.8052, GNorm = 0.6551, lr_0 = 1.3389e-04
Loss = 7.9970e-02, PNorm = 84.8060, GNorm = 0.9801, lr_0 = 1.3380e-04
Loss = 7.3694e-02, PNorm = 84.8074, GNorm = 0.6635, lr_0 = 1.3371e-04
Loss = 7.7909e-02, PNorm = 84.8100, GNorm = 0.6055, lr_0 = 1.3362e-04
Loss = 6.8850e-02, PNorm = 84.8123, GNorm = 0.5640, lr_0 = 1.3353e-04
Loss = 8.4636e-02, PNorm = 84.8132, GNorm = 0.9495, lr_0 = 1.3343e-04
Loss = 9.2232e-02, PNorm = 84.8152, GNorm = 0.8841, lr_0 = 1.3334e-04
Loss = 7.5192e-02, PNorm = 84.8181, GNorm = 0.5912, lr_0 = 1.3325e-04
Loss = 7.5460e-02, PNorm = 84.8204, GNorm = 0.4632, lr_0 = 1.3316e-04
Loss = 8.5597e-02, PNorm = 84.8234, GNorm = 0.5524, lr_0 = 1.3307e-04
Loss = 7.7908e-02, PNorm = 84.8266, GNorm = 0.6711, lr_0 = 1.3298e-04
Loss = 7.4277e-02, PNorm = 84.8289, GNorm = 0.5291, lr_0 = 1.3289e-04
Loss = 9.1571e-02, PNorm = 84.8302, GNorm = 0.6562, lr_0 = 1.3280e-04
Loss = 8.1447e-02, PNorm = 84.8333, GNorm = 0.6024, lr_0 = 1.3270e-04
Loss = 7.7089e-02, PNorm = 84.8370, GNorm = 0.4712, lr_0 = 1.3261e-04
Loss = 8.4442e-02, PNorm = 84.8387, GNorm = 0.9854, lr_0 = 1.3252e-04
Loss = 7.8659e-02, PNorm = 84.8393, GNorm = 0.5511, lr_0 = 1.3243e-04
Loss = 8.0995e-02, PNorm = 84.8427, GNorm = 0.9500, lr_0 = 1.3234e-04
Loss = 7.3877e-02, PNorm = 84.8467, GNorm = 0.6910, lr_0 = 1.3225e-04
Loss = 7.3527e-02, PNorm = 84.8474, GNorm = 0.8341, lr_0 = 1.3216e-04
Loss = 7.8349e-02, PNorm = 84.8499, GNorm = 0.6178, lr_0 = 1.3207e-04
Loss = 7.6424e-02, PNorm = 84.8510, GNorm = 0.6454, lr_0 = 1.3198e-04
Loss = 7.0236e-02, PNorm = 84.8513, GNorm = 0.6491, lr_0 = 1.3189e-04
Loss = 7.9332e-02, PNorm = 84.8536, GNorm = 0.7794, lr_0 = 1.3180e-04
Loss = 7.8363e-02, PNorm = 84.8568, GNorm = 0.5268, lr_0 = 1.3171e-04
Loss = 8.7533e-02, PNorm = 84.8585, GNorm = 0.6330, lr_0 = 1.3162e-04
Loss = 7.6913e-02, PNorm = 84.8586, GNorm = 0.7445, lr_0 = 1.3153e-04
Loss = 7.0772e-02, PNorm = 84.8608, GNorm = 0.8281, lr_0 = 1.3144e-04
Loss = 7.7697e-02, PNorm = 84.8626, GNorm = 0.5114, lr_0 = 1.3135e-04
Loss = 8.1986e-02, PNorm = 84.8642, GNorm = 0.8373, lr_0 = 1.3126e-04
Loss = 7.5684e-02, PNorm = 84.8672, GNorm = 0.4872, lr_0 = 1.3117e-04
Loss = 8.3123e-02, PNorm = 84.8703, GNorm = 0.6433, lr_0 = 1.3108e-04
Loss = 7.8107e-02, PNorm = 84.8721, GNorm = 0.6842, lr_0 = 1.3099e-04
Loss = 7.7929e-02, PNorm = 84.8735, GNorm = 0.6651, lr_0 = 1.3090e-04
Loss = 8.3793e-02, PNorm = 84.8759, GNorm = 0.6718, lr_0 = 1.3081e-04
Loss = 8.2778e-02, PNorm = 84.8774, GNorm = 1.0683, lr_0 = 1.3072e-04
Loss = 7.0092e-02, PNorm = 84.8799, GNorm = 0.5367, lr_0 = 1.3063e-04
Loss = 7.4283e-02, PNorm = 84.8825, GNorm = 0.4644, lr_0 = 1.3054e-04
Loss = 7.9332e-02, PNorm = 84.8848, GNorm = 0.5816, lr_0 = 1.3045e-04
Loss = 8.3080e-02, PNorm = 84.8870, GNorm = 0.8001, lr_0 = 1.3036e-04
Loss = 8.6861e-02, PNorm = 84.8898, GNorm = 0.6132, lr_0 = 1.3027e-04
Loss = 8.9742e-02, PNorm = 84.8927, GNorm = 0.7264, lr_0 = 1.3018e-04
Loss = 8.6826e-02, PNorm = 84.8931, GNorm = 0.4539, lr_0 = 1.3009e-04
Loss = 8.2130e-02, PNorm = 84.8953, GNorm = 0.6132, lr_0 = 1.3000e-04
Loss = 6.5826e-02, PNorm = 84.8996, GNorm = 0.5562, lr_0 = 1.2992e-04
Loss = 8.7221e-02, PNorm = 84.9027, GNorm = 0.6360, lr_0 = 1.2983e-04
Loss = 7.1655e-02, PNorm = 84.9043, GNorm = 0.5729, lr_0 = 1.2974e-04
Loss = 6.7324e-02, PNorm = 84.9054, GNorm = 0.6750, lr_0 = 1.2965e-04
Loss = 7.3608e-02, PNorm = 84.9076, GNorm = 0.6216, lr_0 = 1.2956e-04
Loss = 7.3497e-02, PNorm = 84.9091, GNorm = 0.6966, lr_0 = 1.2947e-04
Loss = 7.9766e-02, PNorm = 84.9104, GNorm = 1.0187, lr_0 = 1.2938e-04
Loss = 8.1301e-02, PNorm = 84.9105, GNorm = 0.6834, lr_0 = 1.2929e-04
Loss = 9.6429e-02, PNorm = 84.9123, GNorm = 0.5213, lr_0 = 1.2921e-04
Loss = 7.9513e-02, PNorm = 84.9148, GNorm = 0.7899, lr_0 = 1.2912e-04
Loss = 8.8243e-02, PNorm = 84.9166, GNorm = 0.3969, lr_0 = 1.2903e-04
Loss = 8.3524e-02, PNorm = 84.9182, GNorm = 0.7047, lr_0 = 1.2894e-04
Loss = 8.8325e-02, PNorm = 84.9206, GNorm = 0.6298, lr_0 = 1.2885e-04
Loss = 7.9047e-02, PNorm = 84.9217, GNorm = 0.6050, lr_0 = 1.2876e-04
Loss = 7.5997e-02, PNorm = 84.9223, GNorm = 0.4830, lr_0 = 1.2867e-04
Loss = 7.9842e-02, PNorm = 84.9226, GNorm = 0.5441, lr_0 = 1.2859e-04
Loss = 8.1990e-02, PNorm = 84.9249, GNorm = 0.5377, lr_0 = 1.2850e-04
Loss = 8.5388e-02, PNorm = 84.9300, GNorm = 0.5059, lr_0 = 1.2841e-04
Loss = 8.2996e-02, PNorm = 84.9331, GNorm = 0.5025, lr_0 = 1.2832e-04
Loss = 8.1723e-02, PNorm = 84.9344, GNorm = 0.6068, lr_0 = 1.2823e-04
Loss = 8.4920e-02, PNorm = 84.9370, GNorm = 0.5286, lr_0 = 1.2815e-04
Loss = 8.6071e-02, PNorm = 84.9383, GNorm = 0.6499, lr_0 = 1.2806e-04
Loss = 7.6695e-02, PNorm = 84.9403, GNorm = 0.6669, lr_0 = 1.2797e-04
Validation mae = 0.227458
Epoch 27
Loss = 7.1590e-02, PNorm = 84.9419, GNorm = 0.5913, lr_0 = 1.2788e-04
Loss = 7.3949e-02, PNorm = 84.9452, GNorm = 0.6234, lr_0 = 1.2780e-04
Loss = 7.8379e-02, PNorm = 84.9475, GNorm = 0.7180, lr_0 = 1.2771e-04
Loss = 8.4213e-02, PNorm = 84.9484, GNorm = 0.5323, lr_0 = 1.2762e-04
Loss = 7.2618e-02, PNorm = 84.9493, GNorm = 0.5011, lr_0 = 1.2753e-04
Loss = 6.7111e-02, PNorm = 84.9511, GNorm = 0.4910, lr_0 = 1.2745e-04
Loss = 7.7135e-02, PNorm = 84.9536, GNorm = 0.5700, lr_0 = 1.2736e-04
Loss = 8.2346e-02, PNorm = 84.9565, GNorm = 0.7756, lr_0 = 1.2727e-04
Loss = 8.5262e-02, PNorm = 84.9590, GNorm = 0.4537, lr_0 = 1.2718e-04
Loss = 7.1731e-02, PNorm = 84.9611, GNorm = 0.5710, lr_0 = 1.2710e-04
Loss = 6.8542e-02, PNorm = 84.9632, GNorm = 0.7399, lr_0 = 1.2701e-04
Loss = 7.7156e-02, PNorm = 84.9653, GNorm = 0.7231, lr_0 = 1.2692e-04
Loss = 8.3791e-02, PNorm = 84.9675, GNorm = 0.6139, lr_0 = 1.2684e-04
Loss = 7.8757e-02, PNorm = 84.9677, GNorm = 0.6822, lr_0 = 1.2675e-04
Loss = 7.3365e-02, PNorm = 84.9694, GNorm = 0.6955, lr_0 = 1.2666e-04
Loss = 7.0811e-02, PNorm = 84.9721, GNorm = 0.6187, lr_0 = 1.2658e-04
Loss = 8.1870e-02, PNorm = 84.9749, GNorm = 0.7613, lr_0 = 1.2649e-04
Loss = 6.8206e-02, PNorm = 84.9756, GNorm = 0.8054, lr_0 = 1.2640e-04
Loss = 7.8204e-02, PNorm = 84.9767, GNorm = 0.5340, lr_0 = 1.2632e-04
Loss = 7.9409e-02, PNorm = 84.9790, GNorm = 0.5782, lr_0 = 1.2623e-04
Loss = 7.3073e-02, PNorm = 84.9799, GNorm = 0.6985, lr_0 = 1.2614e-04
Loss = 7.6719e-02, PNorm = 84.9802, GNorm = 0.4953, lr_0 = 1.2606e-04
Loss = 7.3598e-02, PNorm = 84.9827, GNorm = 0.5690, lr_0 = 1.2597e-04
Loss = 7.8029e-02, PNorm = 84.9848, GNorm = 0.6856, lr_0 = 1.2588e-04
Loss = 7.2169e-02, PNorm = 84.9866, GNorm = 0.5471, lr_0 = 1.2580e-04
Loss = 7.6554e-02, PNorm = 84.9877, GNorm = 0.8360, lr_0 = 1.2571e-04
Loss = 7.6824e-02, PNorm = 84.9887, GNorm = 0.7156, lr_0 = 1.2563e-04
Loss = 7.8745e-02, PNorm = 84.9898, GNorm = 0.5205, lr_0 = 1.2554e-04
Loss = 8.0740e-02, PNorm = 84.9920, GNorm = 0.5538, lr_0 = 1.2545e-04
Loss = 7.4423e-02, PNorm = 84.9935, GNorm = 0.5282, lr_0 = 1.2537e-04
Loss = 8.2186e-02, PNorm = 84.9956, GNorm = 0.6655, lr_0 = 1.2528e-04
Loss = 7.4579e-02, PNorm = 84.9974, GNorm = 0.5570, lr_0 = 1.2520e-04
Loss = 6.7564e-02, PNorm = 84.9990, GNorm = 0.6389, lr_0 = 1.2511e-04
Loss = 8.0106e-02, PNorm = 85.0007, GNorm = 0.4368, lr_0 = 1.2502e-04
Loss = 8.5311e-02, PNorm = 85.0025, GNorm = 0.5582, lr_0 = 1.2494e-04
Loss = 7.6794e-02, PNorm = 85.0034, GNorm = 0.5008, lr_0 = 1.2485e-04
Loss = 8.5379e-02, PNorm = 85.0039, GNorm = 0.9439, lr_0 = 1.2477e-04
Loss = 7.4437e-02, PNorm = 85.0063, GNorm = 0.7346, lr_0 = 1.2468e-04
Loss = 8.1070e-02, PNorm = 85.0084, GNorm = 0.7482, lr_0 = 1.2460e-04
Loss = 7.1897e-02, PNorm = 85.0095, GNorm = 0.6579, lr_0 = 1.2451e-04
Loss = 6.6068e-02, PNorm = 85.0115, GNorm = 0.3754, lr_0 = 1.2443e-04
Loss = 7.0938e-02, PNorm = 85.0133, GNorm = 0.4916, lr_0 = 1.2434e-04
Loss = 8.3168e-02, PNorm = 85.0149, GNorm = 0.9910, lr_0 = 1.2426e-04
Loss = 8.3054e-02, PNorm = 85.0177, GNorm = 0.5989, lr_0 = 1.2417e-04
Loss = 7.0408e-02, PNorm = 85.0215, GNorm = 0.6237, lr_0 = 1.2409e-04
Loss = 8.4171e-02, PNorm = 85.0237, GNorm = 0.6203, lr_0 = 1.2400e-04
Loss = 7.7070e-02, PNorm = 85.0272, GNorm = 0.7355, lr_0 = 1.2392e-04
Loss = 8.0553e-02, PNorm = 85.0280, GNorm = 0.5629, lr_0 = 1.2383e-04
Loss = 7.0583e-02, PNorm = 85.0283, GNorm = 0.6475, lr_0 = 1.2375e-04
Loss = 8.1103e-02, PNorm = 85.0305, GNorm = 0.7038, lr_0 = 1.2366e-04
Loss = 8.4241e-02, PNorm = 85.0337, GNorm = 0.5568, lr_0 = 1.2358e-04
Loss = 7.7941e-02, PNorm = 85.0363, GNorm = 0.8484, lr_0 = 1.2349e-04
Loss = 8.3107e-02, PNorm = 85.0379, GNorm = 0.8082, lr_0 = 1.2341e-04
Loss = 7.5323e-02, PNorm = 85.0400, GNorm = 0.5364, lr_0 = 1.2332e-04
Loss = 7.3107e-02, PNorm = 85.0433, GNorm = 0.6234, lr_0 = 1.2324e-04
Loss = 8.6088e-02, PNorm = 85.0446, GNorm = 0.5669, lr_0 = 1.2315e-04
Loss = 7.2478e-02, PNorm = 85.0458, GNorm = 0.5611, lr_0 = 1.2307e-04
Loss = 7.1831e-02, PNorm = 85.0471, GNorm = 1.1216, lr_0 = 1.2298e-04
Loss = 7.7006e-02, PNorm = 85.0481, GNorm = 0.5602, lr_0 = 1.2290e-04
Loss = 7.6951e-02, PNorm = 85.0513, GNorm = 0.9316, lr_0 = 1.2282e-04
Loss = 7.2252e-02, PNorm = 85.0545, GNorm = 0.5747, lr_0 = 1.2273e-04
Loss = 8.4482e-02, PNorm = 85.0559, GNorm = 0.7103, lr_0 = 1.2265e-04
Loss = 8.0678e-02, PNorm = 85.0549, GNorm = 0.6411, lr_0 = 1.2256e-04
Loss = 6.9880e-02, PNorm = 85.0570, GNorm = 0.6130, lr_0 = 1.2248e-04
Loss = 7.5965e-02, PNorm = 85.0596, GNorm = 0.5562, lr_0 = 1.2240e-04
Loss = 8.3307e-02, PNorm = 85.0613, GNorm = 0.6102, lr_0 = 1.2231e-04
Loss = 7.6337e-02, PNorm = 85.0621, GNorm = 0.6202, lr_0 = 1.2223e-04
Loss = 7.7704e-02, PNorm = 85.0643, GNorm = 0.7302, lr_0 = 1.2214e-04
Loss = 8.1224e-02, PNorm = 85.0662, GNorm = 0.6252, lr_0 = 1.2206e-04
Loss = 6.7274e-02, PNorm = 85.0696, GNorm = 0.4968, lr_0 = 1.2198e-04
Loss = 9.1362e-02, PNorm = 85.0706, GNorm = 0.7357, lr_0 = 1.2189e-04
Loss = 9.9975e-02, PNorm = 85.0717, GNorm = 0.6126, lr_0 = 1.2181e-04
Loss = 7.9448e-02, PNorm = 85.0755, GNorm = 0.6116, lr_0 = 1.2173e-04
Loss = 7.4574e-02, PNorm = 85.0776, GNorm = 0.5821, lr_0 = 1.2164e-04
Loss = 6.8745e-02, PNorm = 85.0803, GNorm = 0.5451, lr_0 = 1.2156e-04
Loss = 7.4424e-02, PNorm = 85.0823, GNorm = 0.5259, lr_0 = 1.2148e-04
Loss = 8.1897e-02, PNorm = 85.0845, GNorm = 0.6713, lr_0 = 1.2139e-04
Loss = 8.4214e-02, PNorm = 85.0848, GNorm = 0.6403, lr_0 = 1.2131e-04
Loss = 8.5536e-02, PNorm = 85.0863, GNorm = 0.6453, lr_0 = 1.2123e-04
Loss = 7.7766e-02, PNorm = 85.0863, GNorm = 0.5470, lr_0 = 1.2114e-04
Loss = 8.0215e-02, PNorm = 85.0873, GNorm = 0.9887, lr_0 = 1.2106e-04
Loss = 7.5463e-02, PNorm = 85.0891, GNorm = 0.7944, lr_0 = 1.2098e-04
Loss = 8.7249e-02, PNorm = 85.0894, GNorm = 0.7021, lr_0 = 1.2090e-04
Loss = 6.7596e-02, PNorm = 85.0898, GNorm = 0.6475, lr_0 = 1.2081e-04
Loss = 7.1991e-02, PNorm = 85.0921, GNorm = 0.6044, lr_0 = 1.2073e-04
Loss = 8.4037e-02, PNorm = 85.0936, GNorm = 0.7635, lr_0 = 1.2065e-04
Loss = 8.6962e-02, PNorm = 85.0939, GNorm = 0.6683, lr_0 = 1.2056e-04
Loss = 6.6419e-02, PNorm = 85.0963, GNorm = 0.4821, lr_0 = 1.2048e-04
Loss = 8.0064e-02, PNorm = 85.0973, GNorm = 0.6171, lr_0 = 1.2040e-04
Loss = 9.0637e-02, PNorm = 85.0989, GNorm = 0.5460, lr_0 = 1.2032e-04
Loss = 7.4726e-02, PNorm = 85.1009, GNorm = 0.6241, lr_0 = 1.2023e-04
Loss = 7.6576e-02, PNorm = 85.1015, GNorm = 1.0378, lr_0 = 1.2015e-04
Loss = 7.6896e-02, PNorm = 85.1035, GNorm = 0.5747, lr_0 = 1.2007e-04
Loss = 7.2274e-02, PNorm = 85.1055, GNorm = 0.7000, lr_0 = 1.1999e-04
Loss = 7.2794e-02, PNorm = 85.1073, GNorm = 0.4799, lr_0 = 1.1991e-04
Loss = 7.3274e-02, PNorm = 85.1087, GNorm = 0.7432, lr_0 = 1.1982e-04
Loss = 7.5735e-02, PNorm = 85.1102, GNorm = 0.5317, lr_0 = 1.1974e-04
Loss = 8.5530e-02, PNorm = 85.1129, GNorm = 0.8537, lr_0 = 1.1966e-04
Loss = 7.4850e-02, PNorm = 85.1131, GNorm = 0.6822, lr_0 = 1.1958e-04
Loss = 8.5386e-02, PNorm = 85.1148, GNorm = 0.6780, lr_0 = 1.1950e-04
Loss = 8.6991e-02, PNorm = 85.1165, GNorm = 0.6790, lr_0 = 1.1941e-04
Loss = 9.3062e-02, PNorm = 85.1180, GNorm = 0.8654, lr_0 = 1.1933e-04
Loss = 8.0011e-02, PNorm = 85.1224, GNorm = 0.6778, lr_0 = 1.1925e-04
Loss = 7.5361e-02, PNorm = 85.1252, GNorm = 0.7626, lr_0 = 1.1917e-04
Loss = 7.9247e-02, PNorm = 85.1277, GNorm = 0.7352, lr_0 = 1.1909e-04
Loss = 7.7808e-02, PNorm = 85.1301, GNorm = 0.7655, lr_0 = 1.1901e-04
Loss = 9.2348e-02, PNorm = 85.1303, GNorm = 0.7338, lr_0 = 1.1892e-04
Loss = 7.5480e-02, PNorm = 85.1320, GNorm = 0.5931, lr_0 = 1.1884e-04
Loss = 8.6312e-02, PNorm = 85.1360, GNorm = 0.3996, lr_0 = 1.1876e-04
Loss = 7.5499e-02, PNorm = 85.1387, GNorm = 0.6239, lr_0 = 1.1868e-04
Loss = 7.9621e-02, PNorm = 85.1399, GNorm = 1.0474, lr_0 = 1.1860e-04
Loss = 7.5286e-02, PNorm = 85.1408, GNorm = 0.6451, lr_0 = 1.1852e-04
Loss = 8.0298e-02, PNorm = 85.1421, GNorm = 0.8026, lr_0 = 1.1844e-04
Loss = 7.0203e-02, PNorm = 85.1444, GNorm = 0.5046, lr_0 = 1.1835e-04
Loss = 7.6072e-02, PNorm = 85.1453, GNorm = 0.6106, lr_0 = 1.1827e-04
Loss = 8.3256e-02, PNorm = 85.1473, GNorm = 0.6094, lr_0 = 1.1819e-04
Loss = 7.6529e-02, PNorm = 85.1493, GNorm = 0.5547, lr_0 = 1.1811e-04
Loss = 7.1573e-02, PNorm = 85.1507, GNorm = 0.5128, lr_0 = 1.1803e-04
Loss = 8.4613e-02, PNorm = 85.1531, GNorm = 0.7226, lr_0 = 1.1795e-04
Loss = 8.4263e-02, PNorm = 85.1548, GNorm = 0.8851, lr_0 = 1.1787e-04
Validation mae = 0.225583
Epoch 28
Loss = 7.0716e-02, PNorm = 85.1582, GNorm = 0.8670, lr_0 = 1.1779e-04
Loss = 7.2157e-02, PNorm = 85.1594, GNorm = 0.5439, lr_0 = 1.1771e-04
Loss = 8.2849e-02, PNorm = 85.1597, GNorm = 0.7503, lr_0 = 1.1763e-04
Loss = 8.1500e-02, PNorm = 85.1612, GNorm = 0.6882, lr_0 = 1.1755e-04
Loss = 7.0462e-02, PNorm = 85.1612, GNorm = 0.5948, lr_0 = 1.1747e-04
Loss = 7.2355e-02, PNorm = 85.1615, GNorm = 0.4694, lr_0 = 1.1739e-04
Loss = 8.7661e-02, PNorm = 85.1635, GNorm = 0.5764, lr_0 = 1.1730e-04
Loss = 7.6425e-02, PNorm = 85.1646, GNorm = 0.5401, lr_0 = 1.1722e-04
Loss = 6.5619e-02, PNorm = 85.1668, GNorm = 0.6022, lr_0 = 1.1714e-04
Loss = 6.5211e-02, PNorm = 85.1686, GNorm = 0.5259, lr_0 = 1.1706e-04
Loss = 7.6746e-02, PNorm = 85.1710, GNorm = 0.5522, lr_0 = 1.1698e-04
Loss = 7.0641e-02, PNorm = 85.1739, GNorm = 0.8879, lr_0 = 1.1690e-04
Loss = 7.5376e-02, PNorm = 85.1754, GNorm = 0.7347, lr_0 = 1.1682e-04
Loss = 7.7177e-02, PNorm = 85.1749, GNorm = 0.5969, lr_0 = 1.1674e-04
Loss = 8.2766e-02, PNorm = 85.1758, GNorm = 0.6608, lr_0 = 1.1666e-04
Loss = 7.4825e-02, PNorm = 85.1773, GNorm = 0.5501, lr_0 = 1.1658e-04
Loss = 7.5679e-02, PNorm = 85.1773, GNorm = 0.6314, lr_0 = 1.1650e-04
Loss = 8.2283e-02, PNorm = 85.1778, GNorm = 0.5634, lr_0 = 1.1642e-04
Loss = 8.1068e-02, PNorm = 85.1809, GNorm = 0.6852, lr_0 = 1.1634e-04
Loss = 7.3991e-02, PNorm = 85.1843, GNorm = 0.6365, lr_0 = 1.1626e-04
Loss = 7.3817e-02, PNorm = 85.1858, GNorm = 0.9078, lr_0 = 1.1618e-04
Loss = 8.1932e-02, PNorm = 85.1863, GNorm = 0.6565, lr_0 = 1.1611e-04
Loss = 6.8254e-02, PNorm = 85.1882, GNorm = 0.6506, lr_0 = 1.1603e-04
Loss = 7.5586e-02, PNorm = 85.1897, GNorm = 0.5321, lr_0 = 1.1595e-04
Loss = 8.3320e-02, PNorm = 85.1915, GNorm = 0.5287, lr_0 = 1.1587e-04
Loss = 7.0295e-02, PNorm = 85.1935, GNorm = 0.4456, lr_0 = 1.1579e-04
Loss = 7.9824e-02, PNorm = 85.1945, GNorm = 0.7811, lr_0 = 1.1571e-04
Loss = 7.2146e-02, PNorm = 85.1975, GNorm = 0.4794, lr_0 = 1.1563e-04
Loss = 6.5475e-02, PNorm = 85.1994, GNorm = 0.4986, lr_0 = 1.1555e-04
Loss = 7.1057e-02, PNorm = 85.2011, GNorm = 0.7929, lr_0 = 1.1547e-04
Loss = 7.9512e-02, PNorm = 85.2033, GNorm = 0.6899, lr_0 = 1.1539e-04
Loss = 7.5934e-02, PNorm = 85.2065, GNorm = 0.6313, lr_0 = 1.1531e-04
Loss = 6.7012e-02, PNorm = 85.2088, GNorm = 0.5932, lr_0 = 1.1523e-04
Loss = 7.5169e-02, PNorm = 85.2111, GNorm = 0.5112, lr_0 = 1.1515e-04
Loss = 6.6654e-02, PNorm = 85.2122, GNorm = 0.5447, lr_0 = 1.1508e-04
Loss = 6.8934e-02, PNorm = 85.2145, GNorm = 0.6913, lr_0 = 1.1500e-04
Loss = 8.0414e-02, PNorm = 85.2154, GNorm = 0.6220, lr_0 = 1.1492e-04
Loss = 8.5281e-02, PNorm = 85.2157, GNorm = 0.7349, lr_0 = 1.1484e-04
Loss = 8.6010e-02, PNorm = 85.2170, GNorm = 0.5395, lr_0 = 1.1476e-04
Loss = 8.4264e-02, PNorm = 85.2180, GNorm = 0.6199, lr_0 = 1.1468e-04
Loss = 7.4848e-02, PNorm = 85.2194, GNorm = 0.7475, lr_0 = 1.1460e-04
Loss = 8.1814e-02, PNorm = 85.2175, GNorm = 0.6253, lr_0 = 1.1452e-04
Loss = 7.8593e-02, PNorm = 85.2184, GNorm = 0.8074, lr_0 = 1.1445e-04
Loss = 9.5847e-02, PNorm = 85.2207, GNorm = 0.7456, lr_0 = 1.1437e-04
Loss = 7.2991e-02, PNorm = 85.2226, GNorm = 0.5367, lr_0 = 1.1429e-04
Loss = 8.8784e-02, PNorm = 85.2251, GNorm = 0.6026, lr_0 = 1.1421e-04
Loss = 7.0733e-02, PNorm = 85.2281, GNorm = 0.6429, lr_0 = 1.1413e-04
Loss = 7.9959e-02, PNorm = 85.2293, GNorm = 0.7595, lr_0 = 1.1405e-04
Loss = 8.0107e-02, PNorm = 85.2312, GNorm = 1.2051, lr_0 = 1.1398e-04
Loss = 7.6573e-02, PNorm = 85.2334, GNorm = 0.5370, lr_0 = 1.1390e-04
Loss = 8.0142e-02, PNorm = 85.2338, GNorm = 0.6574, lr_0 = 1.1382e-04
Loss = 7.5734e-02, PNorm = 85.2359, GNorm = 0.6962, lr_0 = 1.1374e-04
Loss = 8.3218e-02, PNorm = 85.2368, GNorm = 0.6554, lr_0 = 1.1366e-04
Loss = 7.4711e-02, PNorm = 85.2375, GNorm = 0.6966, lr_0 = 1.1359e-04
Loss = 8.5449e-02, PNorm = 85.2391, GNorm = 0.6852, lr_0 = 1.1351e-04
Loss = 6.7904e-02, PNorm = 85.2414, GNorm = 0.4293, lr_0 = 1.1343e-04
Loss = 7.4923e-02, PNorm = 85.2425, GNorm = 0.5848, lr_0 = 1.1335e-04
Loss = 7.8016e-02, PNorm = 85.2447, GNorm = 0.5163, lr_0 = 1.1328e-04
Loss = 7.2682e-02, PNorm = 85.2461, GNorm = 0.7047, lr_0 = 1.1320e-04
Loss = 7.3741e-02, PNorm = 85.2475, GNorm = 0.5182, lr_0 = 1.1312e-04
Loss = 8.1578e-02, PNorm = 85.2501, GNorm = 0.7713, lr_0 = 1.1304e-04
Loss = 8.0533e-02, PNorm = 85.2512, GNorm = 0.7794, lr_0 = 1.1297e-04
Loss = 8.5302e-02, PNorm = 85.2527, GNorm = 0.7043, lr_0 = 1.1289e-04
Loss = 7.7210e-02, PNorm = 85.2541, GNorm = 0.6296, lr_0 = 1.1281e-04
Loss = 7.6105e-02, PNorm = 85.2567, GNorm = 0.5801, lr_0 = 1.1273e-04
Loss = 8.0552e-02, PNorm = 85.2589, GNorm = 0.6935, lr_0 = 1.1266e-04
Loss = 7.1415e-02, PNorm = 85.2608, GNorm = 0.5481, lr_0 = 1.1258e-04
Loss = 7.6883e-02, PNorm = 85.2615, GNorm = 0.6486, lr_0 = 1.1250e-04
Loss = 6.8245e-02, PNorm = 85.2632, GNorm = 0.6056, lr_0 = 1.1243e-04
Loss = 6.6507e-02, PNorm = 85.2644, GNorm = 0.6799, lr_0 = 1.1235e-04
Loss = 7.2834e-02, PNorm = 85.2657, GNorm = 0.5455, lr_0 = 1.1227e-04
Loss = 6.5712e-02, PNorm = 85.2675, GNorm = 0.5714, lr_0 = 1.1219e-04
Loss = 7.6390e-02, PNorm = 85.2695, GNorm = 0.5319, lr_0 = 1.1212e-04
Loss = 8.4426e-02, PNorm = 85.2695, GNorm = 0.5779, lr_0 = 1.1204e-04
Loss = 8.7789e-02, PNorm = 85.2699, GNorm = 0.7383, lr_0 = 1.1196e-04
Loss = 8.5333e-02, PNorm = 85.2731, GNorm = 0.5635, lr_0 = 1.1189e-04
Loss = 8.3397e-02, PNorm = 85.2757, GNorm = 0.5340, lr_0 = 1.1181e-04
Loss = 7.6394e-02, PNorm = 85.2777, GNorm = 0.6339, lr_0 = 1.1173e-04
Loss = 7.7733e-02, PNorm = 85.2800, GNorm = 0.6555, lr_0 = 1.1166e-04
Loss = 6.4225e-02, PNorm = 85.2798, GNorm = 0.5432, lr_0 = 1.1158e-04
Loss = 7.1395e-02, PNorm = 85.2789, GNorm = 0.5181, lr_0 = 1.1150e-04
Loss = 8.1251e-02, PNorm = 85.2809, GNorm = 0.6871, lr_0 = 1.1143e-04
Loss = 6.9978e-02, PNorm = 85.2821, GNorm = 0.5967, lr_0 = 1.1135e-04
Loss = 8.1654e-02, PNorm = 85.2836, GNorm = 0.7039, lr_0 = 1.1128e-04
Loss = 8.3574e-02, PNorm = 85.2857, GNorm = 0.6365, lr_0 = 1.1120e-04
Loss = 7.6690e-02, PNorm = 85.2858, GNorm = 0.6242, lr_0 = 1.1112e-04
Loss = 7.7713e-02, PNorm = 85.2873, GNorm = 0.5205, lr_0 = 1.1105e-04
Loss = 8.1569e-02, PNorm = 85.2901, GNorm = 0.5717, lr_0 = 1.1097e-04
Loss = 8.0290e-02, PNorm = 85.2913, GNorm = 0.9711, lr_0 = 1.1089e-04
Loss = 7.7140e-02, PNorm = 85.2930, GNorm = 0.5727, lr_0 = 1.1082e-04
Loss = 8.0210e-02, PNorm = 85.2963, GNorm = 0.6476, lr_0 = 1.1074e-04
Loss = 8.0856e-02, PNorm = 85.2979, GNorm = 0.5352, lr_0 = 1.1067e-04
Loss = 7.8774e-02, PNorm = 85.2999, GNorm = 0.6108, lr_0 = 1.1059e-04
Loss = 7.0211e-02, PNorm = 85.3022, GNorm = 0.6066, lr_0 = 1.1052e-04
Loss = 6.8069e-02, PNorm = 85.3036, GNorm = 0.4428, lr_0 = 1.1044e-04
Loss = 8.1594e-02, PNorm = 85.3056, GNorm = 0.7472, lr_0 = 1.1036e-04
Loss = 8.1312e-02, PNorm = 85.3074, GNorm = 0.6905, lr_0 = 1.1029e-04
Loss = 8.6288e-02, PNorm = 85.3094, GNorm = 0.8520, lr_0 = 1.1021e-04
Loss = 7.7934e-02, PNorm = 85.3120, GNorm = 0.7360, lr_0 = 1.1014e-04
Loss = 7.7095e-02, PNorm = 85.3147, GNorm = 0.6543, lr_0 = 1.1006e-04
Loss = 8.1213e-02, PNorm = 85.3160, GNorm = 0.6133, lr_0 = 1.0999e-04
Loss = 7.3141e-02, PNorm = 85.3175, GNorm = 0.5462, lr_0 = 1.0991e-04
Loss = 7.7654e-02, PNorm = 85.3179, GNorm = 0.8111, lr_0 = 1.0984e-04
Loss = 7.5398e-02, PNorm = 85.3191, GNorm = 0.7462, lr_0 = 1.0976e-04
Loss = 8.1613e-02, PNorm = 85.3207, GNorm = 0.8238, lr_0 = 1.0969e-04
Loss = 7.5462e-02, PNorm = 85.3207, GNorm = 0.5976, lr_0 = 1.0961e-04
Loss = 7.5440e-02, PNorm = 85.3213, GNorm = 0.6810, lr_0 = 1.0954e-04
Loss = 7.5258e-02, PNorm = 85.3236, GNorm = 0.7120, lr_0 = 1.0946e-04
Loss = 8.4320e-02, PNorm = 85.3245, GNorm = 0.8437, lr_0 = 1.0939e-04
Loss = 7.8819e-02, PNorm = 85.3250, GNorm = 0.7067, lr_0 = 1.0931e-04
Loss = 7.9616e-02, PNorm = 85.3276, GNorm = 0.7374, lr_0 = 1.0924e-04
Loss = 7.7698e-02, PNorm = 85.3304, GNorm = 0.7633, lr_0 = 1.0916e-04
Loss = 8.1701e-02, PNorm = 85.3327, GNorm = 0.6506, lr_0 = 1.0909e-04
Loss = 7.7569e-02, PNorm = 85.3355, GNorm = 0.6746, lr_0 = 1.0901e-04
Loss = 6.9746e-02, PNorm = 85.3374, GNorm = 0.5389, lr_0 = 1.0894e-04
Loss = 7.2900e-02, PNorm = 85.3404, GNorm = 0.6292, lr_0 = 1.0886e-04
Loss = 7.5287e-02, PNorm = 85.3415, GNorm = 0.6050, lr_0 = 1.0879e-04
Loss = 7.6786e-02, PNorm = 85.3427, GNorm = 0.5575, lr_0 = 1.0871e-04
Loss = 8.0736e-02, PNorm = 85.3451, GNorm = 0.6649, lr_0 = 1.0864e-04
Loss = 7.8546e-02, PNorm = 85.3468, GNorm = 0.6651, lr_0 = 1.0856e-04
Validation mae = 0.227231
Epoch 29
Loss = 7.2574e-02, PNorm = 85.3480, GNorm = 0.5249, lr_0 = 1.0849e-04
Loss = 7.8572e-02, PNorm = 85.3492, GNorm = 0.4842, lr_0 = 1.0841e-04
Loss = 7.1586e-02, PNorm = 85.3497, GNorm = 0.7310, lr_0 = 1.0834e-04
Loss = 6.2965e-02, PNorm = 85.3517, GNorm = 0.5588, lr_0 = 1.0827e-04
Loss = 8.2059e-02, PNorm = 85.3533, GNorm = 0.7873, lr_0 = 1.0819e-04
Loss = 7.2290e-02, PNorm = 85.3549, GNorm = 0.6602, lr_0 = 1.0812e-04
Loss = 8.2922e-02, PNorm = 85.3569, GNorm = 0.5976, lr_0 = 1.0804e-04
Loss = 6.7017e-02, PNorm = 85.3597, GNorm = 0.7151, lr_0 = 1.0797e-04
Loss = 7.3569e-02, PNorm = 85.3631, GNorm = 0.5030, lr_0 = 1.0790e-04
Loss = 6.5888e-02, PNorm = 85.3656, GNorm = 0.6473, lr_0 = 1.0782e-04
Loss = 7.3201e-02, PNorm = 85.3671, GNorm = 0.7061, lr_0 = 1.0775e-04
Loss = 7.0548e-02, PNorm = 85.3692, GNorm = 0.5367, lr_0 = 1.0767e-04
Loss = 7.2625e-02, PNorm = 85.3720, GNorm = 0.5972, lr_0 = 1.0760e-04
Loss = 7.9335e-02, PNorm = 85.3747, GNorm = 0.6175, lr_0 = 1.0753e-04
Loss = 7.1103e-02, PNorm = 85.3764, GNorm = 0.5411, lr_0 = 1.0745e-04
Loss = 7.6770e-02, PNorm = 85.3778, GNorm = 0.5820, lr_0 = 1.0738e-04
Loss = 8.3101e-02, PNorm = 85.3781, GNorm = 0.7641, lr_0 = 1.0731e-04
Loss = 8.0269e-02, PNorm = 85.3802, GNorm = 0.7713, lr_0 = 1.0723e-04
Loss = 7.5718e-02, PNorm = 85.3830, GNorm = 0.5814, lr_0 = 1.0716e-04
Loss = 6.6639e-02, PNorm = 85.3849, GNorm = 0.6818, lr_0 = 1.0709e-04
Loss = 7.2576e-02, PNorm = 85.3862, GNorm = 0.5459, lr_0 = 1.0701e-04
Loss = 8.2421e-02, PNorm = 85.3876, GNorm = 0.5824, lr_0 = 1.0694e-04
Loss = 8.1868e-02, PNorm = 85.3892, GNorm = 0.6538, lr_0 = 1.0687e-04
Loss = 6.6099e-02, PNorm = 85.3905, GNorm = 0.5061, lr_0 = 1.0679e-04
Loss = 8.4012e-02, PNorm = 85.3915, GNorm = 0.5746, lr_0 = 1.0672e-04
Loss = 6.9700e-02, PNorm = 85.3935, GNorm = 0.5176, lr_0 = 1.0665e-04
Loss = 8.1904e-02, PNorm = 85.3953, GNorm = 0.6068, lr_0 = 1.0657e-04
Loss = 7.5013e-02, PNorm = 85.3965, GNorm = 0.5334, lr_0 = 1.0650e-04
Loss = 8.2699e-02, PNorm = 85.3969, GNorm = 0.6232, lr_0 = 1.0643e-04
Loss = 7.7609e-02, PNorm = 85.3978, GNorm = 0.6677, lr_0 = 1.0635e-04
Loss = 6.6804e-02, PNorm = 85.3985, GNorm = 0.5477, lr_0 = 1.0628e-04
Loss = 6.9617e-02, PNorm = 85.4005, GNorm = 0.9706, lr_0 = 1.0621e-04
Loss = 7.8019e-02, PNorm = 85.4013, GNorm = 0.7073, lr_0 = 1.0614e-04
Loss = 7.8312e-02, PNorm = 85.4016, GNorm = 0.6742, lr_0 = 1.0606e-04
Loss = 8.4977e-02, PNorm = 85.4032, GNorm = 0.9021, lr_0 = 1.0599e-04
Loss = 7.7770e-02, PNorm = 85.4050, GNorm = 0.8093, lr_0 = 1.0592e-04
Loss = 7.5663e-02, PNorm = 85.4058, GNorm = 0.6740, lr_0 = 1.0585e-04
Loss = 7.9009e-02, PNorm = 85.4067, GNorm = 0.7687, lr_0 = 1.0577e-04
Loss = 8.0920e-02, PNorm = 85.4076, GNorm = 0.5206, lr_0 = 1.0570e-04
Loss = 7.4914e-02, PNorm = 85.4087, GNorm = 0.8813, lr_0 = 1.0563e-04
Loss = 8.9410e-02, PNorm = 85.4111, GNorm = 0.8181, lr_0 = 1.0556e-04
Loss = 7.0894e-02, PNorm = 85.4127, GNorm = 0.7279, lr_0 = 1.0548e-04
Loss = 7.5672e-02, PNorm = 85.4134, GNorm = 0.5986, lr_0 = 1.0541e-04
Loss = 8.3369e-02, PNorm = 85.4131, GNorm = 0.6074, lr_0 = 1.0534e-04
Loss = 7.0051e-02, PNorm = 85.4139, GNorm = 0.7268, lr_0 = 1.0527e-04
Loss = 6.9299e-02, PNorm = 85.4162, GNorm = 0.5762, lr_0 = 1.0519e-04
Loss = 7.8006e-02, PNorm = 85.4184, GNorm = 0.5677, lr_0 = 1.0512e-04
Loss = 7.9159e-02, PNorm = 85.4190, GNorm = 0.6273, lr_0 = 1.0505e-04
Loss = 6.4342e-02, PNorm = 85.4212, GNorm = 0.4758, lr_0 = 1.0498e-04
Loss = 7.1518e-02, PNorm = 85.4218, GNorm = 0.6430, lr_0 = 1.0491e-04
Loss = 7.6179e-02, PNorm = 85.4225, GNorm = 0.5322, lr_0 = 1.0483e-04
Loss = 7.4696e-02, PNorm = 85.4247, GNorm = 0.5318, lr_0 = 1.0476e-04
Loss = 5.8659e-02, PNorm = 85.4257, GNorm = 0.4393, lr_0 = 1.0469e-04
Loss = 7.5955e-02, PNorm = 85.4275, GNorm = 0.7368, lr_0 = 1.0462e-04
Loss = 6.9798e-02, PNorm = 85.4295, GNorm = 0.6123, lr_0 = 1.0455e-04
Loss = 6.7197e-02, PNorm = 85.4316, GNorm = 0.5403, lr_0 = 1.0448e-04
Loss = 8.1804e-02, PNorm = 85.4328, GNorm = 0.4996, lr_0 = 1.0440e-04
Loss = 8.3030e-02, PNorm = 85.4347, GNorm = 0.5626, lr_0 = 1.0433e-04
Loss = 7.2913e-02, PNorm = 85.4365, GNorm = 0.8449, lr_0 = 1.0426e-04
Loss = 7.4962e-02, PNorm = 85.4362, GNorm = 0.5407, lr_0 = 1.0419e-04
Loss = 7.0128e-02, PNorm = 85.4374, GNorm = 0.5729, lr_0 = 1.0412e-04
Loss = 7.0790e-02, PNorm = 85.4388, GNorm = 0.5737, lr_0 = 1.0405e-04
Loss = 6.7859e-02, PNorm = 85.4402, GNorm = 0.5256, lr_0 = 1.0398e-04
Loss = 7.2106e-02, PNorm = 85.4415, GNorm = 0.6014, lr_0 = 1.0391e-04
Loss = 7.5993e-02, PNorm = 85.4422, GNorm = 0.6994, lr_0 = 1.0383e-04
Loss = 7.4037e-02, PNorm = 85.4435, GNorm = 0.9573, lr_0 = 1.0376e-04
Loss = 8.5790e-02, PNorm = 85.4438, GNorm = 0.7330, lr_0 = 1.0369e-04
Loss = 7.5850e-02, PNorm = 85.4441, GNorm = 0.6348, lr_0 = 1.0362e-04
Loss = 6.6750e-02, PNorm = 85.4446, GNorm = 0.6854, lr_0 = 1.0355e-04
Loss = 7.0587e-02, PNorm = 85.4473, GNorm = 0.6470, lr_0 = 1.0348e-04
Loss = 7.6341e-02, PNorm = 85.4490, GNorm = 0.7770, lr_0 = 1.0341e-04
Loss = 7.7362e-02, PNorm = 85.4491, GNorm = 0.7716, lr_0 = 1.0334e-04
Loss = 7.6572e-02, PNorm = 85.4497, GNorm = 0.5974, lr_0 = 1.0327e-04
Loss = 8.0681e-02, PNorm = 85.4525, GNorm = 0.5939, lr_0 = 1.0320e-04
Loss = 8.2015e-02, PNorm = 85.4548, GNorm = 0.8745, lr_0 = 1.0312e-04
Loss = 6.7138e-02, PNorm = 85.4567, GNorm = 0.5212, lr_0 = 1.0305e-04
Loss = 8.0179e-02, PNorm = 85.4579, GNorm = 0.8447, lr_0 = 1.0298e-04
Loss = 8.3577e-02, PNorm = 85.4585, GNorm = 0.6708, lr_0 = 1.0291e-04
Loss = 8.2321e-02, PNorm = 85.4597, GNorm = 0.8019, lr_0 = 1.0284e-04
Loss = 6.6276e-02, PNorm = 85.4611, GNorm = 0.5750, lr_0 = 1.0277e-04
Loss = 7.2889e-02, PNorm = 85.4620, GNorm = 0.5465, lr_0 = 1.0270e-04
Loss = 7.2442e-02, PNorm = 85.4630, GNorm = 0.4958, lr_0 = 1.0263e-04
Loss = 7.6683e-02, PNorm = 85.4652, GNorm = 0.4971, lr_0 = 1.0256e-04
Loss = 6.7071e-02, PNorm = 85.4665, GNorm = 0.5022, lr_0 = 1.0249e-04
Loss = 7.1844e-02, PNorm = 85.4667, GNorm = 0.6880, lr_0 = 1.0242e-04
Loss = 7.1848e-02, PNorm = 85.4668, GNorm = 0.5854, lr_0 = 1.0235e-04
Loss = 7.3885e-02, PNorm = 85.4671, GNorm = 0.7276, lr_0 = 1.0228e-04
Loss = 8.1570e-02, PNorm = 85.4676, GNorm = 0.7298, lr_0 = 1.0221e-04
Loss = 8.8500e-02, PNorm = 85.4688, GNorm = 0.6248, lr_0 = 1.0214e-04
Loss = 7.8429e-02, PNorm = 85.4700, GNorm = 0.8062, lr_0 = 1.0207e-04
Loss = 7.6068e-02, PNorm = 85.4715, GNorm = 0.5781, lr_0 = 1.0200e-04
Loss = 7.5760e-02, PNorm = 85.4727, GNorm = 0.7419, lr_0 = 1.0193e-04
Loss = 8.0404e-02, PNorm = 85.4744, GNorm = 0.5967, lr_0 = 1.0186e-04
Loss = 8.9627e-02, PNorm = 85.4777, GNorm = 0.6620, lr_0 = 1.0179e-04
Loss = 7.4188e-02, PNorm = 85.4804, GNorm = 0.7095, lr_0 = 1.0172e-04
Loss = 7.6782e-02, PNorm = 85.4818, GNorm = 0.5306, lr_0 = 1.0165e-04
Loss = 7.2028e-02, PNorm = 85.4823, GNorm = 0.6299, lr_0 = 1.0158e-04
Loss = 7.3567e-02, PNorm = 85.4842, GNorm = 0.5432, lr_0 = 1.0151e-04
Loss = 7.8740e-02, PNorm = 85.4865, GNorm = 0.7767, lr_0 = 1.0144e-04
Loss = 6.9386e-02, PNorm = 85.4896, GNorm = 0.6607, lr_0 = 1.0137e-04
Loss = 7.0994e-02, PNorm = 85.4906, GNorm = 0.5579, lr_0 = 1.0130e-04
Loss = 7.2993e-02, PNorm = 85.4912, GNorm = 0.4777, lr_0 = 1.0123e-04
Loss = 8.2264e-02, PNorm = 85.4924, GNorm = 0.7657, lr_0 = 1.0116e-04
Loss = 7.7881e-02, PNorm = 85.4941, GNorm = 0.8257, lr_0 = 1.0110e-04
Loss = 6.7288e-02, PNorm = 85.4961, GNorm = 0.4399, lr_0 = 1.0103e-04
Loss = 7.1174e-02, PNorm = 85.4983, GNorm = 0.6727, lr_0 = 1.0096e-04
Loss = 8.0815e-02, PNorm = 85.5005, GNorm = 0.8160, lr_0 = 1.0089e-04
Loss = 8.3129e-02, PNorm = 85.5033, GNorm = 0.6711, lr_0 = 1.0082e-04
Loss = 7.7772e-02, PNorm = 85.5057, GNorm = 0.7134, lr_0 = 1.0075e-04
Loss = 7.3498e-02, PNorm = 85.5066, GNorm = 0.8090, lr_0 = 1.0068e-04
Loss = 7.3388e-02, PNorm = 85.5089, GNorm = 0.5780, lr_0 = 1.0061e-04
Loss = 7.1856e-02, PNorm = 85.5102, GNorm = 0.5714, lr_0 = 1.0054e-04
Loss = 7.5400e-02, PNorm = 85.5119, GNorm = 0.7846, lr_0 = 1.0047e-04
Loss = 7.7693e-02, PNorm = 85.5142, GNorm = 0.5552, lr_0 = 1.0041e-04
Loss = 6.9641e-02, PNorm = 85.5153, GNorm = 0.6267, lr_0 = 1.0034e-04
Loss = 7.8140e-02, PNorm = 85.5159, GNorm = 0.6688, lr_0 = 1.0027e-04
Loss = 7.0206e-02, PNorm = 85.5169, GNorm = 0.7648, lr_0 = 1.0020e-04
Loss = 7.3406e-02, PNorm = 85.5179, GNorm = 0.8063, lr_0 = 1.0013e-04
Loss = 6.9389e-02, PNorm = 85.5198, GNorm = 0.6191, lr_0 = 1.0006e-04
Loss = 7.6557e-02, PNorm = 85.5216, GNorm = 0.7498, lr_0 = 1.0000e-04
Validation mae = 0.227280
Model 0 best validation mae = 0.225583 on epoch 27
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.225760
Ensemble test mae = 0.225760
Fold 4
Splitting data with seed 4
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.1, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=500, bias=False)
        (W_h): Linear(in_features=500, out_features=500, bias=False)
        (W_o): Linear(in_features=633, out_features=500, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.1, inplace=False)
    (1): Linear(in_features=500, out_features=500, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.1, inplace=False)
    (4): Linear(in_features=500, out_features=500, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.1, inplace=False)
    (7): Linear(in_features=500, out_features=1, bias=True)
  )
)
Number of parameters = 1,142,001
Moving model to cuda
Epoch 0
Loss = 9.7667e-01, PNorm = 47.8616, GNorm = 2.8040, lr_0 = 1.0413e-04
Loss = 9.7351e-01, PNorm = 47.8627, GNorm = 4.4168, lr_0 = 1.0788e-04
Loss = 8.8650e-01, PNorm = 47.8642, GNorm = 2.9050, lr_0 = 1.1163e-04
Loss = 9.4137e-01, PNorm = 47.8657, GNorm = 4.7458, lr_0 = 1.1537e-04
Loss = 8.9472e-01, PNorm = 47.8684, GNorm = 2.4849, lr_0 = 1.1913e-04
Loss = 8.4028e-01, PNorm = 47.8720, GNorm = 3.7651, lr_0 = 1.2287e-04
Loss = 7.9295e-01, PNorm = 47.8773, GNorm = 2.1937, lr_0 = 1.2663e-04
Loss = 7.2711e-01, PNorm = 47.8843, GNorm = 5.2400, lr_0 = 1.3038e-04
Loss = 6.8795e-01, PNorm = 47.8906, GNorm = 6.1146, lr_0 = 1.3413e-04
Loss = 6.3576e-01, PNorm = 47.8969, GNorm = 3.5847, lr_0 = 1.3788e-04
Loss = 6.5115e-01, PNorm = 47.9035, GNorm = 14.3097, lr_0 = 1.4163e-04
Loss = 6.3196e-01, PNorm = 47.9074, GNorm = 9.8294, lr_0 = 1.4537e-04
Loss = 6.0524e-01, PNorm = 47.9135, GNorm = 6.2088, lr_0 = 1.4913e-04
Loss = 4.9187e-01, PNorm = 47.9200, GNorm = 13.4304, lr_0 = 1.5288e-04
Loss = 5.4052e-01, PNorm = 47.9247, GNorm = 15.9188, lr_0 = 1.5662e-04
Loss = 4.3566e-01, PNorm = 47.9293, GNorm = 3.0074, lr_0 = 1.6038e-04
Loss = 4.3524e-01, PNorm = 47.9345, GNorm = 3.8729, lr_0 = 1.6412e-04
Loss = 5.4076e-01, PNorm = 47.9386, GNorm = 3.8094, lr_0 = 1.6788e-04
Loss = 4.0870e-01, PNorm = 47.9446, GNorm = 12.4541, lr_0 = 1.7163e-04
Loss = 5.1765e-01, PNorm = 47.9475, GNorm = 11.2936, lr_0 = 1.7538e-04
Loss = 4.9468e-01, PNorm = 47.9516, GNorm = 5.4190, lr_0 = 1.7913e-04
Loss = 4.4282e-01, PNorm = 47.9583, GNorm = 19.6432, lr_0 = 1.8288e-04
Loss = 4.4677e-01, PNorm = 47.9631, GNorm = 3.7465, lr_0 = 1.8662e-04
Loss = 4.3937e-01, PNorm = 47.9678, GNorm = 5.2007, lr_0 = 1.9038e-04
Loss = 3.8997e-01, PNorm = 47.9744, GNorm = 1.9722, lr_0 = 1.9413e-04
Loss = 3.3752e-01, PNorm = 47.9818, GNorm = 1.8824, lr_0 = 1.9788e-04
Loss = 3.7684e-01, PNorm = 47.9864, GNorm = 4.4993, lr_0 = 2.0163e-04
Loss = 4.2799e-01, PNorm = 47.9918, GNorm = 23.4955, lr_0 = 2.0537e-04
Loss = 5.5713e-01, PNorm = 47.9948, GNorm = 13.1643, lr_0 = 2.0913e-04
Loss = 4.8878e-01, PNorm = 48.0019, GNorm = 7.4538, lr_0 = 2.1288e-04
Loss = 4.1915e-01, PNorm = 48.0104, GNorm = 10.7213, lr_0 = 2.1663e-04
Loss = 3.6910e-01, PNorm = 48.0155, GNorm = 2.4549, lr_0 = 2.2038e-04
Loss = 3.7023e-01, PNorm = 48.0220, GNorm = 3.6543, lr_0 = 2.2412e-04
Loss = 3.4069e-01, PNorm = 48.0263, GNorm = 6.7377, lr_0 = 2.2787e-04
Loss = 4.2351e-01, PNorm = 48.0337, GNorm = 1.4003, lr_0 = 2.3163e-04
Loss = 4.0720e-01, PNorm = 48.0404, GNorm = 13.4871, lr_0 = 2.3538e-04
Loss = 3.9410e-01, PNorm = 48.0472, GNorm = 12.5911, lr_0 = 2.3913e-04
Loss = 3.7770e-01, PNorm = 48.0539, GNorm = 5.8761, lr_0 = 2.4288e-04
Loss = 3.7747e-01, PNorm = 48.0600, GNorm = 2.5107, lr_0 = 2.4662e-04
Loss = 3.7431e-01, PNorm = 48.0665, GNorm = 11.7175, lr_0 = 2.5038e-04
Loss = 3.2109e-01, PNorm = 48.0743, GNorm = 10.5588, lr_0 = 2.5413e-04
Loss = 3.0053e-01, PNorm = 48.0789, GNorm = 4.4392, lr_0 = 2.5788e-04
Loss = 3.3505e-01, PNorm = 48.0822, GNorm = 4.6772, lr_0 = 2.6163e-04
Loss = 3.6832e-01, PNorm = 48.0883, GNorm = 2.4743, lr_0 = 2.6537e-04
Loss = 4.0970e-01, PNorm = 48.0975, GNorm = 11.3026, lr_0 = 2.6912e-04
Loss = 3.4493e-01, PNorm = 48.1073, GNorm = 7.3096, lr_0 = 2.7288e-04
Loss = 4.3672e-01, PNorm = 48.1142, GNorm = 2.7287, lr_0 = 2.7663e-04
Loss = 3.1810e-01, PNorm = 48.1230, GNorm = 1.6455, lr_0 = 2.8038e-04
Loss = 3.2556e-01, PNorm = 48.1293, GNorm = 3.8585, lr_0 = 2.8413e-04
Loss = 3.0280e-01, PNorm = 48.1361, GNorm = 11.9163, lr_0 = 2.8787e-04
Loss = 3.3776e-01, PNorm = 48.1421, GNorm = 11.5664, lr_0 = 2.9163e-04
Loss = 3.7855e-01, PNorm = 48.1471, GNorm = 2.2019, lr_0 = 2.9538e-04
Loss = 3.0733e-01, PNorm = 48.1542, GNorm = 2.1054, lr_0 = 2.9913e-04
Loss = 3.0260e-01, PNorm = 48.1593, GNorm = 3.1138, lr_0 = 3.0288e-04
Loss = 3.3766e-01, PNorm = 48.1614, GNorm = 10.3402, lr_0 = 3.0662e-04
Loss = 3.1719e-01, PNorm = 48.1692, GNorm = 6.5553, lr_0 = 3.1037e-04
Loss = 3.7788e-01, PNorm = 48.1733, GNorm = 4.4565, lr_0 = 3.1413e-04
Loss = 3.0109e-01, PNorm = 48.1838, GNorm = 10.0552, lr_0 = 3.1788e-04
Loss = 3.0917e-01, PNorm = 48.1907, GNorm = 3.1391, lr_0 = 3.2163e-04
Loss = 2.7352e-01, PNorm = 48.1974, GNorm = 4.0838, lr_0 = 3.2538e-04
Loss = 3.8693e-01, PNorm = 48.2005, GNorm = 18.7238, lr_0 = 3.2912e-04
Loss = 3.6820e-01, PNorm = 48.2062, GNorm = 1.9444, lr_0 = 3.3288e-04
Loss = 3.3835e-01, PNorm = 48.2172, GNorm = 4.0636, lr_0 = 3.3663e-04
Loss = 3.4311e-01, PNorm = 48.2256, GNorm = 5.5813, lr_0 = 3.4038e-04
Loss = 3.3904e-01, PNorm = 48.2352, GNorm = 17.3281, lr_0 = 3.4413e-04
Loss = 2.9568e-01, PNorm = 48.2459, GNorm = 1.4422, lr_0 = 3.4787e-04
Loss = 3.4105e-01, PNorm = 48.2542, GNorm = 6.3224, lr_0 = 3.5162e-04
Loss = 3.1777e-01, PNorm = 48.2593, GNorm = 9.1286, lr_0 = 3.5538e-04
Loss = 3.3080e-01, PNorm = 48.2668, GNorm = 2.2924, lr_0 = 3.5913e-04
Loss = 2.8361e-01, PNorm = 48.2740, GNorm = 3.5569, lr_0 = 3.6288e-04
Loss = 3.0232e-01, PNorm = 48.2817, GNorm = 5.4903, lr_0 = 3.6662e-04
Loss = 3.0478e-01, PNorm = 48.2909, GNorm = 5.7961, lr_0 = 3.7037e-04
Loss = 3.3754e-01, PNorm = 48.3011, GNorm = 6.7828, lr_0 = 3.7413e-04
Loss = 3.0108e-01, PNorm = 48.3148, GNorm = 2.3548, lr_0 = 3.7788e-04
Loss = 3.1500e-01, PNorm = 48.3228, GNorm = 4.7393, lr_0 = 3.8163e-04
Loss = 3.3381e-01, PNorm = 48.3317, GNorm = 8.0134, lr_0 = 3.8537e-04
Loss = 4.0424e-01, PNorm = 48.3402, GNorm = 13.1007, lr_0 = 3.8912e-04
Loss = 3.1158e-01, PNorm = 48.3526, GNorm = 1.5053, lr_0 = 3.9287e-04
Loss = 2.7946e-01, PNorm = 48.3655, GNorm = 10.1090, lr_0 = 3.9663e-04
Loss = 2.8235e-01, PNorm = 48.3757, GNorm = 5.0824, lr_0 = 4.0038e-04
Loss = 2.6510e-01, PNorm = 48.3831, GNorm = 5.5581, lr_0 = 4.0413e-04
Loss = 2.6625e-01, PNorm = 48.3883, GNorm = 5.7347, lr_0 = 4.0787e-04
Loss = 3.0678e-01, PNorm = 48.3934, GNorm = 4.3349, lr_0 = 4.1162e-04
Loss = 2.9490e-01, PNorm = 48.4038, GNorm = 7.1993, lr_0 = 4.1537e-04
Loss = 2.8043e-01, PNorm = 48.4154, GNorm = 1.6400, lr_0 = 4.1913e-04
Loss = 2.4620e-01, PNorm = 48.4230, GNorm = 0.9676, lr_0 = 4.2288e-04
Loss = 2.8778e-01, PNorm = 48.4311, GNorm = 5.5438, lr_0 = 4.2662e-04
Loss = 2.9633e-01, PNorm = 48.4369, GNorm = 4.7595, lr_0 = 4.3037e-04
Loss = 3.0733e-01, PNorm = 48.4470, GNorm = 8.2905, lr_0 = 4.3412e-04
Loss = 3.0470e-01, PNorm = 48.4548, GNorm = 1.1924, lr_0 = 4.3788e-04
Loss = 3.1024e-01, PNorm = 48.4663, GNorm = 8.5686, lr_0 = 4.4163e-04
Loss = 3.0503e-01, PNorm = 48.4787, GNorm = 1.3135, lr_0 = 4.4538e-04
Loss = 2.8113e-01, PNorm = 48.4897, GNorm = 2.5142, lr_0 = 4.4912e-04
Loss = 3.1149e-01, PNorm = 48.5014, GNorm = 6.1348, lr_0 = 4.5287e-04
Loss = 2.8617e-01, PNorm = 48.5126, GNorm = 4.7060, lr_0 = 4.5662e-04
Loss = 3.3415e-01, PNorm = 48.5215, GNorm = 6.4868, lr_0 = 4.6038e-04
Loss = 2.9355e-01, PNorm = 48.5352, GNorm = 1.0789, lr_0 = 4.6413e-04
Loss = 2.8725e-01, PNorm = 48.5474, GNorm = 7.3503, lr_0 = 4.6787e-04
Loss = 3.0921e-01, PNorm = 48.5584, GNorm = 3.8876, lr_0 = 4.7162e-04
Loss = 2.8465e-01, PNorm = 48.5727, GNorm = 5.6306, lr_0 = 4.7537e-04
Loss = 2.7251e-01, PNorm = 48.5789, GNorm = 6.5906, lr_0 = 4.7913e-04
Loss = 2.7544e-01, PNorm = 48.5876, GNorm = 9.4107, lr_0 = 4.8288e-04
Loss = 2.8936e-01, PNorm = 48.5942, GNorm = 5.1521, lr_0 = 4.8663e-04
Loss = 2.9158e-01, PNorm = 48.6087, GNorm = 1.9276, lr_0 = 4.9038e-04
Loss = 3.1144e-01, PNorm = 48.6204, GNorm = 4.1253, lr_0 = 4.9412e-04
Loss = 2.9424e-01, PNorm = 48.6288, GNorm = 8.0394, lr_0 = 4.9788e-04
Loss = 3.3001e-01, PNorm = 48.6426, GNorm = 6.4847, lr_0 = 5.0163e-04
Loss = 3.7114e-01, PNorm = 48.6585, GNorm = 6.7732, lr_0 = 5.0538e-04
Loss = 2.8216e-01, PNorm = 48.6738, GNorm = 3.2902, lr_0 = 5.0913e-04
Loss = 2.9806e-01, PNorm = 48.6871, GNorm = 4.1711, lr_0 = 5.1287e-04
Loss = 3.2283e-01, PNorm = 48.6887, GNorm = 5.6562, lr_0 = 5.1663e-04
Loss = 3.6069e-01, PNorm = 48.7006, GNorm = 5.4631, lr_0 = 5.2038e-04
Loss = 2.9689e-01, PNorm = 48.7170, GNorm = 1.2415, lr_0 = 5.2413e-04
Loss = 2.5871e-01, PNorm = 48.7203, GNorm = 4.3666, lr_0 = 5.2788e-04
Loss = 2.6797e-01, PNorm = 48.7325, GNorm = 1.9371, lr_0 = 5.3162e-04
Loss = 2.6029e-01, PNorm = 48.7414, GNorm = 2.2084, lr_0 = 5.3538e-04
Loss = 2.6551e-01, PNorm = 48.7491, GNorm = 1.2184, lr_0 = 5.3912e-04
Loss = 2.9483e-01, PNorm = 48.7579, GNorm = 1.9258, lr_0 = 5.4288e-04
Loss = 2.8658e-01, PNorm = 48.7707, GNorm = 5.4877, lr_0 = 5.4663e-04
Loss = 2.7578e-01, PNorm = 48.7792, GNorm = 1.8410, lr_0 = 5.5038e-04
Validation mae = 0.333884
Epoch 1
Loss = 2.5749e-01, PNorm = 48.7882, GNorm = 2.4099, lr_0 = 5.5413e-04
Loss = 2.7310e-01, PNorm = 48.8012, GNorm = 5.3961, lr_0 = 5.5787e-04
Loss = 3.2392e-01, PNorm = 48.8162, GNorm = 1.1814, lr_0 = 5.6163e-04
Loss = 2.8001e-01, PNorm = 48.8268, GNorm = 3.2651, lr_0 = 5.6538e-04
Loss = 2.7467e-01, PNorm = 48.8362, GNorm = 5.0638, lr_0 = 5.6913e-04
Loss = 2.7969e-01, PNorm = 48.8454, GNorm = 6.1072, lr_0 = 5.7288e-04
Loss = 2.7556e-01, PNorm = 48.8562, GNorm = 6.2643, lr_0 = 5.7662e-04
Loss = 2.4940e-01, PNorm = 48.8679, GNorm = 2.8822, lr_0 = 5.8038e-04
Loss = 2.5497e-01, PNorm = 48.8797, GNorm = 6.8377, lr_0 = 5.8413e-04
Loss = 2.8309e-01, PNorm = 48.8892, GNorm = 1.4629, lr_0 = 5.8788e-04
Loss = 2.9182e-01, PNorm = 48.9031, GNorm = 2.7123, lr_0 = 5.9163e-04
Loss = 2.7708e-01, PNorm = 48.9211, GNorm = 1.7660, lr_0 = 5.9538e-04
Loss = 2.4979e-01, PNorm = 48.9340, GNorm = 1.8973, lr_0 = 5.9913e-04
Loss = 2.7106e-01, PNorm = 48.9490, GNorm = 2.1055, lr_0 = 6.0288e-04
Loss = 2.2514e-01, PNorm = 48.9653, GNorm = 3.0972, lr_0 = 6.0663e-04
Loss = 2.8627e-01, PNorm = 48.9720, GNorm = 6.5646, lr_0 = 6.1038e-04
Loss = 3.0169e-01, PNorm = 48.9915, GNorm = 5.4302, lr_0 = 6.1413e-04
Loss = 2.9638e-01, PNorm = 49.0077, GNorm = 10.6582, lr_0 = 6.1788e-04
Loss = 3.2889e-01, PNorm = 49.0268, GNorm = 4.3582, lr_0 = 6.2163e-04
Loss = 2.8751e-01, PNorm = 49.0460, GNorm = 3.7483, lr_0 = 6.2538e-04
Loss = 2.5742e-01, PNorm = 49.0623, GNorm = 2.2177, lr_0 = 6.2913e-04
Loss = 2.8160e-01, PNorm = 49.0712, GNorm = 0.9035, lr_0 = 6.3288e-04
Loss = 2.9177e-01, PNorm = 49.0806, GNorm = 6.0207, lr_0 = 6.3663e-04
Loss = 2.9630e-01, PNorm = 49.0945, GNorm = 1.2092, lr_0 = 6.4038e-04
Loss = 2.7077e-01, PNorm = 49.1080, GNorm = 4.2606, lr_0 = 6.4413e-04
Loss = 3.0139e-01, PNorm = 49.1223, GNorm = 6.2581, lr_0 = 6.4788e-04
Loss = 2.4963e-01, PNorm = 49.1381, GNorm = 1.9027, lr_0 = 6.5163e-04
Loss = 2.7751e-01, PNorm = 49.1555, GNorm = 6.2757, lr_0 = 6.5538e-04
Loss = 2.6151e-01, PNorm = 49.1728, GNorm = 2.0242, lr_0 = 6.5913e-04
Loss = 2.3173e-01, PNorm = 49.1888, GNorm = 5.3112, lr_0 = 6.6288e-04
Loss = 2.4251e-01, PNorm = 49.1979, GNorm = 1.6659, lr_0 = 6.6663e-04
Loss = 2.4321e-01, PNorm = 49.2061, GNorm = 1.0747, lr_0 = 6.7038e-04
Loss = 2.9933e-01, PNorm = 49.2197, GNorm = 2.6148, lr_0 = 6.7413e-04
Loss = 3.0575e-01, PNorm = 49.2320, GNorm = 12.7370, lr_0 = 6.7788e-04
Loss = 2.6475e-01, PNorm = 49.2494, GNorm = 2.0308, lr_0 = 6.8163e-04
Loss = 2.7425e-01, PNorm = 49.2655, GNorm = 2.5161, lr_0 = 6.8538e-04
Loss = 2.6702e-01, PNorm = 49.2813, GNorm = 2.7527, lr_0 = 6.8913e-04
Loss = 2.8717e-01, PNorm = 49.2903, GNorm = 1.4492, lr_0 = 6.9288e-04
Loss = 2.4271e-01, PNorm = 49.2984, GNorm = 0.8012, lr_0 = 6.9663e-04
Loss = 2.7250e-01, PNorm = 49.3128, GNorm = 1.4455, lr_0 = 7.0038e-04
Loss = 2.7549e-01, PNorm = 49.3361, GNorm = 4.0124, lr_0 = 7.0413e-04
Loss = 3.4576e-01, PNorm = 49.3478, GNorm = 5.7549, lr_0 = 7.0788e-04
Loss = 2.8875e-01, PNorm = 49.3788, GNorm = 1.5497, lr_0 = 7.1163e-04
Loss = 2.4896e-01, PNorm = 49.3944, GNorm = 1.6575, lr_0 = 7.1538e-04
Loss = 3.1672e-01, PNorm = 49.4128, GNorm = 9.0072, lr_0 = 7.1913e-04
Loss = 2.9308e-01, PNorm = 49.4295, GNorm = 3.9745, lr_0 = 7.2288e-04
Loss = 3.3491e-01, PNorm = 49.4502, GNorm = 4.0256, lr_0 = 7.2663e-04
Loss = 2.8224e-01, PNorm = 49.4747, GNorm = 2.9766, lr_0 = 7.3038e-04
Loss = 2.6553e-01, PNorm = 49.4958, GNorm = 4.8582, lr_0 = 7.3413e-04
Loss = 2.6803e-01, PNorm = 49.5145, GNorm = 1.0571, lr_0 = 7.3788e-04
Loss = 2.9661e-01, PNorm = 49.5386, GNorm = 3.7904, lr_0 = 7.4163e-04
Loss = 2.7875e-01, PNorm = 49.5517, GNorm = 2.5196, lr_0 = 7.4538e-04
Loss = 2.6289e-01, PNorm = 49.5758, GNorm = 6.4866, lr_0 = 7.4913e-04
Loss = 2.7767e-01, PNorm = 49.5945, GNorm = 6.6461, lr_0 = 7.5288e-04
Loss = 3.0446e-01, PNorm = 49.6199, GNorm = 5.2062, lr_0 = 7.5663e-04
Loss = 2.7325e-01, PNorm = 49.6399, GNorm = 1.8981, lr_0 = 7.6038e-04
Loss = 2.8676e-01, PNorm = 49.6540, GNorm = 3.2186, lr_0 = 7.6413e-04
Loss = 2.6695e-01, PNorm = 49.6686, GNorm = 1.1465, lr_0 = 7.6788e-04
Loss = 2.4970e-01, PNorm = 49.6846, GNorm = 2.8050, lr_0 = 7.7163e-04
Loss = 2.6141e-01, PNorm = 49.6975, GNorm = 2.7515, lr_0 = 7.7538e-04
Loss = 2.8477e-01, PNorm = 49.7147, GNorm = 4.5517, lr_0 = 7.7913e-04
Loss = 2.7927e-01, PNorm = 49.7329, GNorm = 1.0166, lr_0 = 7.8288e-04
Loss = 2.5946e-01, PNorm = 49.7511, GNorm = 1.1799, lr_0 = 7.8663e-04
Loss = 2.2931e-01, PNorm = 49.7675, GNorm = 4.2038, lr_0 = 7.9038e-04
Loss = 2.4058e-01, PNorm = 49.7815, GNorm = 1.9973, lr_0 = 7.9413e-04
Loss = 2.6408e-01, PNorm = 49.8016, GNorm = 5.1940, lr_0 = 7.9788e-04
Loss = 2.7244e-01, PNorm = 49.8156, GNorm = 1.9249, lr_0 = 8.0163e-04
Loss = 2.4959e-01, PNorm = 49.8394, GNorm = 3.9448, lr_0 = 8.0538e-04
Loss = 2.4236e-01, PNorm = 49.8620, GNorm = 2.6303, lr_0 = 8.0913e-04
Loss = 2.2349e-01, PNorm = 49.8736, GNorm = 0.7457, lr_0 = 8.1288e-04
Loss = 2.4144e-01, PNorm = 49.8927, GNorm = 3.3941, lr_0 = 8.1663e-04
Loss = 2.5444e-01, PNorm = 49.9020, GNorm = 2.0572, lr_0 = 8.2038e-04
Loss = 2.6266e-01, PNorm = 49.9272, GNorm = 1.3671, lr_0 = 8.2413e-04
Loss = 2.1497e-01, PNorm = 49.9431, GNorm = 1.7944, lr_0 = 8.2788e-04
Loss = 2.2833e-01, PNorm = 49.9593, GNorm = 1.8747, lr_0 = 8.3163e-04
Loss = 2.3626e-01, PNorm = 49.9793, GNorm = 2.3995, lr_0 = 8.3538e-04
Loss = 2.8096e-01, PNorm = 49.9888, GNorm = 6.1038, lr_0 = 8.3913e-04
Loss = 2.7405e-01, PNorm = 50.0158, GNorm = 3.2521, lr_0 = 8.4288e-04
Loss = 2.7106e-01, PNorm = 50.0354, GNorm = 2.3588, lr_0 = 8.4663e-04
Loss = 2.5213e-01, PNorm = 50.0607, GNorm = 2.9654, lr_0 = 8.5038e-04
Loss = 2.4245e-01, PNorm = 50.0830, GNorm = 4.1888, lr_0 = 8.5413e-04
Loss = 2.3599e-01, PNorm = 50.0975, GNorm = 1.2339, lr_0 = 8.5788e-04
Loss = 2.6228e-01, PNorm = 50.1191, GNorm = 1.0286, lr_0 = 8.6163e-04
Loss = 2.5557e-01, PNorm = 50.1260, GNorm = 2.0761, lr_0 = 8.6538e-04
Loss = 2.7799e-01, PNorm = 50.1479, GNorm = 3.6334, lr_0 = 8.6913e-04
Loss = 2.6041e-01, PNorm = 50.1752, GNorm = 2.6573, lr_0 = 8.7288e-04
Loss = 2.7586e-01, PNorm = 50.1992, GNorm = 2.6225, lr_0 = 8.7663e-04
Loss = 2.5802e-01, PNorm = 50.2192, GNorm = 4.4298, lr_0 = 8.8038e-04
Loss = 2.4302e-01, PNorm = 50.2434, GNorm = 1.2430, lr_0 = 8.8413e-04
Loss = 3.0439e-01, PNorm = 50.2617, GNorm = 1.7195, lr_0 = 8.8788e-04
Loss = 2.4248e-01, PNorm = 50.2923, GNorm = 1.1649, lr_0 = 8.9163e-04
Loss = 2.5741e-01, PNorm = 50.3057, GNorm = 1.2788, lr_0 = 8.9538e-04
Loss = 2.4130e-01, PNorm = 50.3304, GNorm = 2.3925, lr_0 = 8.9913e-04
Loss = 2.6947e-01, PNorm = 50.3558, GNorm = 4.5902, lr_0 = 9.0288e-04
Loss = 2.4882e-01, PNorm = 50.3800, GNorm = 2.5670, lr_0 = 9.0663e-04
Loss = 2.7198e-01, PNorm = 50.4013, GNorm = 1.4163, lr_0 = 9.1038e-04
Loss = 2.3813e-01, PNorm = 50.4270, GNorm = 3.5166, lr_0 = 9.1413e-04
Loss = 2.5256e-01, PNorm = 50.4416, GNorm = 1.5864, lr_0 = 9.1788e-04
Loss = 2.5838e-01, PNorm = 50.4560, GNorm = 1.2869, lr_0 = 9.2163e-04
Loss = 2.5249e-01, PNorm = 50.4716, GNorm = 1.8237, lr_0 = 9.2538e-04
Loss = 2.7236e-01, PNorm = 50.4844, GNorm = 1.7881, lr_0 = 9.2913e-04
Loss = 2.6893e-01, PNorm = 50.5087, GNorm = 3.8368, lr_0 = 9.3288e-04
Loss = 2.1689e-01, PNorm = 50.5386, GNorm = 2.0168, lr_0 = 9.3663e-04
Loss = 2.9213e-01, PNorm = 50.5598, GNorm = 1.3100, lr_0 = 9.4038e-04
Loss = 2.3247e-01, PNorm = 50.5821, GNorm = 3.7900, lr_0 = 9.4413e-04
Loss = 2.9348e-01, PNorm = 50.6057, GNorm = 1.9472, lr_0 = 9.4788e-04
Loss = 2.4876e-01, PNorm = 50.6395, GNorm = 1.0009, lr_0 = 9.5163e-04
Loss = 2.1870e-01, PNorm = 50.6616, GNorm = 0.8736, lr_0 = 9.5538e-04
Loss = 2.5322e-01, PNorm = 50.6866, GNorm = 2.9274, lr_0 = 9.5913e-04
Loss = 2.3678e-01, PNorm = 50.7066, GNorm = 3.9940, lr_0 = 9.6288e-04
Loss = 2.3621e-01, PNorm = 50.7276, GNorm = 1.5581, lr_0 = 9.6663e-04
Loss = 2.3031e-01, PNorm = 50.7444, GNorm = 1.2070, lr_0 = 9.7038e-04
Loss = 2.8544e-01, PNorm = 50.7721, GNorm = 3.6502, lr_0 = 9.7413e-04
Loss = 2.5029e-01, PNorm = 50.7966, GNorm = 3.5224, lr_0 = 9.7788e-04
Loss = 2.7802e-01, PNorm = 50.8247, GNorm = 0.7384, lr_0 = 9.8163e-04
Loss = 2.3041e-01, PNorm = 50.8570, GNorm = 1.6032, lr_0 = 9.8537e-04
Loss = 2.8588e-01, PNorm = 50.8808, GNorm = 1.3607, lr_0 = 9.8912e-04
Loss = 2.4704e-01, PNorm = 50.9030, GNorm = 1.9727, lr_0 = 9.9288e-04
Loss = 2.2089e-01, PNorm = 50.9240, GNorm = 1.3178, lr_0 = 9.9663e-04
Loss = 2.6716e-01, PNorm = 50.9496, GNorm = 1.3672, lr_0 = 9.9993e-04
Validation mae = 0.337092
Epoch 2
Loss = 2.4594e-01, PNorm = 50.9778, GNorm = 3.0154, lr_0 = 9.9925e-04
Loss = 2.5236e-01, PNorm = 50.9978, GNorm = 2.5506, lr_0 = 9.9856e-04
Loss = 2.5344e-01, PNorm = 51.0307, GNorm = 1.5771, lr_0 = 9.9788e-04
Loss = 2.3225e-01, PNorm = 51.0448, GNorm = 2.1544, lr_0 = 9.9719e-04
Loss = 2.2906e-01, PNorm = 51.0679, GNorm = 1.7515, lr_0 = 9.9651e-04
Loss = 2.8117e-01, PNorm = 51.0912, GNorm = 5.8186, lr_0 = 9.9583e-04
Loss = 2.7338e-01, PNorm = 51.1184, GNorm = 1.0765, lr_0 = 9.9515e-04
Loss = 2.3452e-01, PNorm = 51.1473, GNorm = 1.3346, lr_0 = 9.9446e-04
Loss = 2.2179e-01, PNorm = 51.1806, GNorm = 1.2137, lr_0 = 9.9378e-04
Loss = 2.3201e-01, PNorm = 51.2153, GNorm = 0.8490, lr_0 = 9.9310e-04
Loss = 2.3964e-01, PNorm = 51.2332, GNorm = 0.7636, lr_0 = 9.9242e-04
Loss = 2.3559e-01, PNorm = 51.2594, GNorm = 1.1453, lr_0 = 9.9174e-04
Loss = 2.6496e-01, PNorm = 51.2807, GNorm = 2.9909, lr_0 = 9.9106e-04
Loss = 2.6170e-01, PNorm = 51.3034, GNorm = 3.5759, lr_0 = 9.9038e-04
Loss = 2.5195e-01, PNorm = 51.3361, GNorm = 1.5945, lr_0 = 9.8971e-04
Loss = 2.8541e-01, PNorm = 51.3673, GNorm = 4.8717, lr_0 = 9.8903e-04
Loss = 2.3236e-01, PNorm = 51.4043, GNorm = 1.8703, lr_0 = 9.8835e-04
Loss = 2.5262e-01, PNorm = 51.4264, GNorm = 1.2091, lr_0 = 9.8767e-04
Loss = 2.3680e-01, PNorm = 51.4505, GNorm = 1.1515, lr_0 = 9.8700e-04
Loss = 1.9885e-01, PNorm = 51.4802, GNorm = 0.8637, lr_0 = 9.8632e-04
Loss = 2.3776e-01, PNorm = 51.5069, GNorm = 1.2381, lr_0 = 9.8564e-04
Loss = 1.9951e-01, PNorm = 51.5326, GNorm = 1.8371, lr_0 = 9.8497e-04
Loss = 2.5353e-01, PNorm = 51.5494, GNorm = 0.7349, lr_0 = 9.8429e-04
Loss = 2.7992e-01, PNorm = 51.5861, GNorm = 4.4098, lr_0 = 9.8362e-04
Loss = 2.4097e-01, PNorm = 51.6103, GNorm = 1.0833, lr_0 = 9.8295e-04
Loss = 2.4785e-01, PNorm = 51.6355, GNorm = 2.6022, lr_0 = 9.8227e-04
Loss = 2.0351e-01, PNorm = 51.6548, GNorm = 1.0734, lr_0 = 9.8160e-04
Loss = 2.2916e-01, PNorm = 51.6736, GNorm = 1.5441, lr_0 = 9.8093e-04
Loss = 2.2886e-01, PNorm = 51.7003, GNorm = 1.2044, lr_0 = 9.8026e-04
Loss = 2.3013e-01, PNorm = 51.7322, GNorm = 1.1689, lr_0 = 9.7958e-04
Loss = 2.1833e-01, PNorm = 51.7502, GNorm = 0.9729, lr_0 = 9.7891e-04
Loss = 2.6547e-01, PNorm = 51.7644, GNorm = 1.4607, lr_0 = 9.7824e-04
Loss = 2.5091e-01, PNorm = 51.7953, GNorm = 2.0643, lr_0 = 9.7757e-04
Loss = 2.4606e-01, PNorm = 51.8212, GNorm = 0.9191, lr_0 = 9.7690e-04
Loss = 2.0562e-01, PNorm = 51.8472, GNorm = 2.1388, lr_0 = 9.7623e-04
Loss = 2.4159e-01, PNorm = 51.8737, GNorm = 3.6485, lr_0 = 9.7556e-04
Loss = 2.4315e-01, PNorm = 51.8952, GNorm = 4.0388, lr_0 = 9.7490e-04
Loss = 2.2820e-01, PNorm = 51.9200, GNorm = 1.5981, lr_0 = 9.7423e-04
Loss = 2.8147e-01, PNorm = 51.9638, GNorm = 4.1188, lr_0 = 9.7356e-04
Loss = 2.4944e-01, PNorm = 52.0073, GNorm = 1.1271, lr_0 = 9.7289e-04
Loss = 2.2038e-01, PNorm = 52.0377, GNorm = 1.0190, lr_0 = 9.7223e-04
Loss = 2.1797e-01, PNorm = 52.0597, GNorm = 1.0000, lr_0 = 9.7156e-04
Loss = 1.8960e-01, PNorm = 52.0860, GNorm = 0.6714, lr_0 = 9.7090e-04
Loss = 2.1445e-01, PNorm = 52.1084, GNorm = 2.3512, lr_0 = 9.7023e-04
Loss = 2.2133e-01, PNorm = 52.1293, GNorm = 1.6897, lr_0 = 9.6957e-04
Loss = 2.3540e-01, PNorm = 52.1586, GNorm = 0.7524, lr_0 = 9.6890e-04
Loss = 2.2417e-01, PNorm = 52.1779, GNorm = 0.9917, lr_0 = 9.6824e-04
Loss = 2.2062e-01, PNorm = 52.2010, GNorm = 1.0039, lr_0 = 9.6757e-04
Loss = 2.2203e-01, PNorm = 52.2230, GNorm = 1.8240, lr_0 = 9.6691e-04
Loss = 2.1464e-01, PNorm = 52.2422, GNorm = 2.5308, lr_0 = 9.6625e-04
Loss = 2.3417e-01, PNorm = 52.2658, GNorm = 1.2130, lr_0 = 9.6559e-04
Loss = 1.9319e-01, PNorm = 52.2862, GNorm = 0.6879, lr_0 = 9.6493e-04
Loss = 2.1990e-01, PNorm = 52.3092, GNorm = 0.9816, lr_0 = 9.6427e-04
Loss = 2.1364e-01, PNorm = 52.3315, GNorm = 1.9133, lr_0 = 9.6360e-04
Loss = 2.3963e-01, PNorm = 52.3605, GNorm = 2.4128, lr_0 = 9.6294e-04
Loss = 2.1909e-01, PNorm = 52.3892, GNorm = 2.9331, lr_0 = 9.6228e-04
Loss = 2.3143e-01, PNorm = 52.4100, GNorm = 3.4380, lr_0 = 9.6163e-04
Loss = 2.5596e-01, PNorm = 52.4354, GNorm = 1.7094, lr_0 = 9.6097e-04
Loss = 2.2793e-01, PNorm = 52.4785, GNorm = 2.5319, lr_0 = 9.6031e-04
Loss = 2.5356e-01, PNorm = 52.5038, GNorm = 3.7249, lr_0 = 9.5965e-04
Loss = 2.1224e-01, PNorm = 52.5261, GNorm = 1.3823, lr_0 = 9.5899e-04
Loss = 2.2854e-01, PNorm = 52.5506, GNorm = 0.6789, lr_0 = 9.5834e-04
Loss = 2.4503e-01, PNorm = 52.5830, GNorm = 1.4949, lr_0 = 9.5768e-04
Loss = 2.1060e-01, PNorm = 52.6085, GNorm = 0.7981, lr_0 = 9.5702e-04
Loss = 2.2455e-01, PNorm = 52.6344, GNorm = 1.0667, lr_0 = 9.5637e-04
Loss = 2.8317e-01, PNorm = 52.6550, GNorm = 3.1492, lr_0 = 9.5571e-04
Loss = 2.7527e-01, PNorm = 52.6894, GNorm = 1.5640, lr_0 = 9.5506e-04
Loss = 2.5466e-01, PNorm = 52.7265, GNorm = 1.0496, lr_0 = 9.5440e-04
Loss = 2.5838e-01, PNorm = 52.7549, GNorm = 1.2522, lr_0 = 9.5375e-04
Loss = 2.3785e-01, PNorm = 52.7834, GNorm = 0.9615, lr_0 = 9.5310e-04
Loss = 2.1376e-01, PNorm = 52.8168, GNorm = 1.8421, lr_0 = 9.5244e-04
Loss = 2.6174e-01, PNorm = 52.8447, GNorm = 1.8775, lr_0 = 9.5179e-04
Loss = 2.3480e-01, PNorm = 52.8712, GNorm = 0.8109, lr_0 = 9.5114e-04
Loss = 2.4282e-01, PNorm = 52.8945, GNorm = 3.9180, lr_0 = 9.5049e-04
Loss = 2.4803e-01, PNorm = 52.9140, GNorm = 2.2419, lr_0 = 9.4984e-04
Loss = 2.3813e-01, PNorm = 52.9463, GNorm = 1.0988, lr_0 = 9.4919e-04
Loss = 2.0649e-01, PNorm = 52.9777, GNorm = 2.4433, lr_0 = 9.4854e-04
Loss = 2.4772e-01, PNorm = 53.0013, GNorm = 1.4988, lr_0 = 9.4789e-04
Loss = 2.4019e-01, PNorm = 53.0324, GNorm = 0.9521, lr_0 = 9.4724e-04
Loss = 2.2634e-01, PNorm = 53.0613, GNorm = 0.7573, lr_0 = 9.4659e-04
Loss = 2.3198e-01, PNorm = 53.0853, GNorm = 1.8176, lr_0 = 9.4594e-04
Loss = 2.3548e-01, PNorm = 53.1150, GNorm = 1.0814, lr_0 = 9.4529e-04
Loss = 2.2522e-01, PNorm = 53.1381, GNorm = 1.1060, lr_0 = 9.4464e-04
Loss = 2.3366e-01, PNorm = 53.1698, GNorm = 1.1594, lr_0 = 9.4400e-04
Loss = 2.1133e-01, PNorm = 53.1857, GNorm = 1.9277, lr_0 = 9.4335e-04
Loss = 2.2525e-01, PNorm = 53.2147, GNorm = 1.3192, lr_0 = 9.4270e-04
Loss = 2.2684e-01, PNorm = 53.2504, GNorm = 0.7962, lr_0 = 9.4206e-04
Loss = 2.0040e-01, PNorm = 53.2754, GNorm = 1.6741, lr_0 = 9.4141e-04
Loss = 2.2076e-01, PNorm = 53.3004, GNorm = 3.0448, lr_0 = 9.4077e-04
Loss = 2.1989e-01, PNorm = 53.3149, GNorm = 0.8914, lr_0 = 9.4012e-04
Loss = 2.1097e-01, PNorm = 53.3370, GNorm = 0.8230, lr_0 = 9.3948e-04
Loss = 2.3202e-01, PNorm = 53.3658, GNorm = 1.4258, lr_0 = 9.3884e-04
Loss = 2.5747e-01, PNorm = 53.3878, GNorm = 1.0892, lr_0 = 9.3819e-04
Loss = 2.3313e-01, PNorm = 53.4198, GNorm = 1.8984, lr_0 = 9.3755e-04
Loss = 2.2611e-01, PNorm = 53.4434, GNorm = 2.3728, lr_0 = 9.3691e-04
Loss = 2.0645e-01, PNorm = 53.4618, GNorm = 0.9032, lr_0 = 9.3627e-04
Loss = 2.1494e-01, PNorm = 53.4869, GNorm = 0.9038, lr_0 = 9.3562e-04
Loss = 1.9712e-01, PNorm = 53.5141, GNorm = 0.8382, lr_0 = 9.3498e-04
Loss = 2.2561e-01, PNorm = 53.5468, GNorm = 1.6796, lr_0 = 9.3434e-04
Loss = 2.1225e-01, PNorm = 53.5785, GNorm = 1.0748, lr_0 = 9.3370e-04
Loss = 2.1939e-01, PNorm = 53.6030, GNorm = 1.4774, lr_0 = 9.3306e-04
Loss = 2.1724e-01, PNorm = 53.6344, GNorm = 2.0206, lr_0 = 9.3242e-04
Loss = 2.1200e-01, PNorm = 53.6527, GNorm = 2.5121, lr_0 = 9.3178e-04
Loss = 2.1282e-01, PNorm = 53.6789, GNorm = 1.2125, lr_0 = 9.3115e-04
Loss = 2.2396e-01, PNorm = 53.7054, GNorm = 1.5825, lr_0 = 9.3051e-04
Loss = 2.0269e-01, PNorm = 53.7260, GNorm = 0.7909, lr_0 = 9.2987e-04
Loss = 2.2127e-01, PNorm = 53.7525, GNorm = 2.4932, lr_0 = 9.2923e-04
Loss = 2.2920e-01, PNorm = 53.7822, GNorm = 0.7713, lr_0 = 9.2860e-04
Loss = 2.0581e-01, PNorm = 53.8144, GNorm = 2.2594, lr_0 = 9.2796e-04
Loss = 2.5676e-01, PNorm = 53.8414, GNorm = 2.0709, lr_0 = 9.2733e-04
Loss = 2.6939e-01, PNorm = 53.8767, GNorm = 2.8112, lr_0 = 9.2669e-04
Loss = 2.5735e-01, PNorm = 53.9171, GNorm = 0.7879, lr_0 = 9.2606e-04
Loss = 2.3187e-01, PNorm = 53.9378, GNorm = 0.7967, lr_0 = 9.2542e-04
Loss = 2.1703e-01, PNorm = 53.9661, GNorm = 2.4313, lr_0 = 9.2479e-04
Loss = 2.3417e-01, PNorm = 53.9878, GNorm = 0.8257, lr_0 = 9.2415e-04
Loss = 2.1226e-01, PNorm = 54.0097, GNorm = 0.5862, lr_0 = 9.2352e-04
Loss = 2.1383e-01, PNorm = 54.0270, GNorm = 1.4617, lr_0 = 9.2289e-04
Loss = 2.0349e-01, PNorm = 54.0468, GNorm = 0.9523, lr_0 = 9.2226e-04
Loss = 2.1101e-01, PNorm = 54.0614, GNorm = 0.5493, lr_0 = 9.2162e-04
Loss = 2.2924e-01, PNorm = 54.0845, GNorm = 3.1174, lr_0 = 9.2099e-04
Validation mae = 0.269219
Epoch 3
Loss = 1.9758e-01, PNorm = 54.1075, GNorm = 2.1573, lr_0 = 9.2036e-04
Loss = 2.0895e-01, PNorm = 54.1361, GNorm = 2.9642, lr_0 = 9.1973e-04
Loss = 2.7857e-01, PNorm = 54.1719, GNorm = 1.7577, lr_0 = 9.1910e-04
Loss = 2.2508e-01, PNorm = 54.2001, GNorm = 1.5735, lr_0 = 9.1847e-04
Loss = 2.0013e-01, PNorm = 54.2198, GNorm = 0.7918, lr_0 = 9.1784e-04
Loss = 1.9151e-01, PNorm = 54.2398, GNorm = 1.0673, lr_0 = 9.1721e-04
Loss = 2.1988e-01, PNorm = 54.2682, GNorm = 2.2650, lr_0 = 9.1658e-04
Loss = 1.9096e-01, PNorm = 54.2954, GNorm = 0.6394, lr_0 = 9.1596e-04
Loss = 1.8402e-01, PNorm = 54.3203, GNorm = 1.7128, lr_0 = 9.1533e-04
Loss = 1.9902e-01, PNorm = 54.3346, GNorm = 1.1324, lr_0 = 9.1470e-04
Loss = 1.9574e-01, PNorm = 54.3557, GNorm = 2.1371, lr_0 = 9.1408e-04
Loss = 1.9438e-01, PNorm = 54.3785, GNorm = 1.2206, lr_0 = 9.1345e-04
Loss = 2.0121e-01, PNorm = 54.4090, GNorm = 2.2692, lr_0 = 9.1282e-04
Loss = 2.0216e-01, PNorm = 54.4308, GNorm = 1.0923, lr_0 = 9.1220e-04
Loss = 2.0246e-01, PNorm = 54.4422, GNorm = 1.1865, lr_0 = 9.1157e-04
Loss = 2.1115e-01, PNorm = 54.4710, GNorm = 3.0589, lr_0 = 9.1095e-04
Loss = 2.0358e-01, PNorm = 54.4931, GNorm = 1.2336, lr_0 = 9.1032e-04
Loss = 1.9416e-01, PNorm = 54.5164, GNorm = 0.7606, lr_0 = 9.0970e-04
Loss = 2.2288e-01, PNorm = 54.5406, GNorm = 1.0076, lr_0 = 9.0908e-04
Loss = 2.2247e-01, PNorm = 54.5684, GNorm = 1.4722, lr_0 = 9.0846e-04
Loss = 1.7602e-01, PNorm = 54.5978, GNorm = 0.5434, lr_0 = 9.0783e-04
Loss = 2.0584e-01, PNorm = 54.6273, GNorm = 1.9924, lr_0 = 9.0721e-04
Loss = 2.5318e-01, PNorm = 54.6637, GNorm = 1.0201, lr_0 = 9.0659e-04
Loss = 2.4375e-01, PNorm = 54.7000, GNorm = 1.9321, lr_0 = 9.0597e-04
Loss = 2.2745e-01, PNorm = 54.7379, GNorm = 0.6777, lr_0 = 9.0535e-04
Loss = 2.0373e-01, PNorm = 54.7685, GNorm = 1.3869, lr_0 = 9.0473e-04
Loss = 2.3040e-01, PNorm = 54.7951, GNorm = 2.4548, lr_0 = 9.0411e-04
Loss = 2.3618e-01, PNorm = 54.8268, GNorm = 1.9580, lr_0 = 9.0349e-04
Loss = 2.1017e-01, PNorm = 54.8603, GNorm = 0.8020, lr_0 = 9.0287e-04
Loss = 1.9644e-01, PNorm = 54.8820, GNorm = 1.4015, lr_0 = 9.0225e-04
Loss = 2.1297e-01, PNorm = 54.9138, GNorm = 2.4797, lr_0 = 9.0163e-04
Loss = 1.9029e-01, PNorm = 54.9442, GNorm = 0.7444, lr_0 = 9.0102e-04
Loss = 2.3731e-01, PNorm = 54.9792, GNorm = 1.2298, lr_0 = 9.0040e-04
Loss = 2.2382e-01, PNorm = 55.0130, GNorm = 1.8575, lr_0 = 8.9978e-04
Loss = 1.8832e-01, PNorm = 55.0336, GNorm = 0.8536, lr_0 = 8.9916e-04
Loss = 1.9863e-01, PNorm = 55.0552, GNorm = 1.0249, lr_0 = 8.9855e-04
Loss = 1.9549e-01, PNorm = 55.0750, GNorm = 2.9715, lr_0 = 8.9793e-04
Loss = 1.9449e-01, PNorm = 55.0999, GNorm = 1.2299, lr_0 = 8.9732e-04
Loss = 1.8933e-01, PNorm = 55.1217, GNorm = 0.8195, lr_0 = 8.9670e-04
Loss = 2.0439e-01, PNorm = 55.1538, GNorm = 2.0936, lr_0 = 8.9609e-04
Loss = 2.1278e-01, PNorm = 55.1891, GNorm = 0.6378, lr_0 = 8.9548e-04
Loss = 2.5051e-01, PNorm = 55.2202, GNorm = 2.3903, lr_0 = 8.9486e-04
Loss = 2.3143e-01, PNorm = 55.2515, GNorm = 1.7682, lr_0 = 8.9425e-04
Loss = 2.3311e-01, PNorm = 55.2790, GNorm = 0.8551, lr_0 = 8.9364e-04
Loss = 1.8185e-01, PNorm = 55.3058, GNorm = 1.3546, lr_0 = 8.9302e-04
Loss = 1.9290e-01, PNorm = 55.3315, GNorm = 0.7123, lr_0 = 8.9241e-04
Loss = 1.8431e-01, PNorm = 55.3440, GNorm = 0.9159, lr_0 = 8.9180e-04
Loss = 2.2311e-01, PNorm = 55.3672, GNorm = 2.8022, lr_0 = 8.9119e-04
Loss = 1.9535e-01, PNorm = 55.3996, GNorm = 1.2807, lr_0 = 8.9058e-04
Loss = 2.3417e-01, PNorm = 55.4214, GNorm = 0.7145, lr_0 = 8.8997e-04
Loss = 2.1034e-01, PNorm = 55.4606, GNorm = 0.8552, lr_0 = 8.8936e-04
Loss = 2.1762e-01, PNorm = 55.4845, GNorm = 1.7236, lr_0 = 8.8875e-04
Loss = 2.1891e-01, PNorm = 55.5129, GNorm = 1.7545, lr_0 = 8.8814e-04
Loss = 2.2182e-01, PNorm = 55.5247, GNorm = 1.7921, lr_0 = 8.8753e-04
Loss = 2.2871e-01, PNorm = 55.5568, GNorm = 0.7727, lr_0 = 8.8693e-04
Loss = 1.9752e-01, PNorm = 55.5916, GNorm = 0.7075, lr_0 = 8.8632e-04
Loss = 2.1043e-01, PNorm = 55.6159, GNorm = 1.0476, lr_0 = 8.8571e-04
Loss = 2.0401e-01, PNorm = 55.6402, GNorm = 1.4512, lr_0 = 8.8510e-04
Loss = 1.9649e-01, PNorm = 55.6600, GNorm = 0.6054, lr_0 = 8.8450e-04
Loss = 1.8971e-01, PNorm = 55.6814, GNorm = 0.7647, lr_0 = 8.8389e-04
Loss = 1.9118e-01, PNorm = 55.7084, GNorm = 0.8086, lr_0 = 8.8329e-04
Loss = 1.9918e-01, PNorm = 55.7334, GNorm = 3.6597, lr_0 = 8.8268e-04
Loss = 1.8944e-01, PNorm = 55.7558, GNorm = 1.2400, lr_0 = 8.8208e-04
Loss = 2.2621e-01, PNorm = 55.7808, GNorm = 1.3372, lr_0 = 8.8147e-04
Loss = 1.9044e-01, PNorm = 55.8073, GNorm = 2.3230, lr_0 = 8.8087e-04
Loss = 1.8999e-01, PNorm = 55.8384, GNorm = 0.7263, lr_0 = 8.8026e-04
Loss = 2.0251e-01, PNorm = 55.8580, GNorm = 0.6782, lr_0 = 8.7966e-04
Loss = 2.0013e-01, PNorm = 55.8879, GNorm = 0.7764, lr_0 = 8.7906e-04
Loss = 2.2001e-01, PNorm = 55.9165, GNorm = 2.0176, lr_0 = 8.7846e-04
Loss = 2.1326e-01, PNorm = 55.9497, GNorm = 1.2670, lr_0 = 8.7785e-04
Loss = 2.2060e-01, PNorm = 55.9796, GNorm = 2.9905, lr_0 = 8.7725e-04
Loss = 1.9738e-01, PNorm = 56.0029, GNorm = 0.5553, lr_0 = 8.7665e-04
Loss = 2.0641e-01, PNorm = 56.0275, GNorm = 2.1678, lr_0 = 8.7605e-04
Loss = 2.1321e-01, PNorm = 56.0562, GNorm = 0.6592, lr_0 = 8.7545e-04
Loss = 1.9130e-01, PNorm = 56.0860, GNorm = 2.0704, lr_0 = 8.7485e-04
Loss = 1.9203e-01, PNorm = 56.1124, GNorm = 1.2877, lr_0 = 8.7425e-04
Loss = 2.0342e-01, PNorm = 56.1379, GNorm = 0.8190, lr_0 = 8.7365e-04
Loss = 1.9452e-01, PNorm = 56.1663, GNorm = 0.9214, lr_0 = 8.7306e-04
Loss = 1.6829e-01, PNorm = 56.1907, GNorm = 0.8967, lr_0 = 8.7246e-04
Loss = 1.8907e-01, PNorm = 56.2199, GNorm = 1.5043, lr_0 = 8.7186e-04
Loss = 1.7816e-01, PNorm = 56.2453, GNorm = 2.4521, lr_0 = 8.7126e-04
Loss = 1.9808e-01, PNorm = 56.2736, GNorm = 1.5297, lr_0 = 8.7067e-04
Loss = 2.0309e-01, PNorm = 56.3042, GNorm = 0.9304, lr_0 = 8.7007e-04
Loss = 1.9463e-01, PNorm = 56.3256, GNorm = 1.8624, lr_0 = 8.6947e-04
Loss = 2.0995e-01, PNorm = 56.3491, GNorm = 1.4476, lr_0 = 8.6888e-04
Loss = 2.4588e-01, PNorm = 56.3784, GNorm = 1.3310, lr_0 = 8.6828e-04
Loss = 1.9515e-01, PNorm = 56.4035, GNorm = 1.6883, lr_0 = 8.6769e-04
Loss = 2.1665e-01, PNorm = 56.4302, GNorm = 1.0263, lr_0 = 8.6709e-04
Loss = 2.0675e-01, PNorm = 56.4603, GNorm = 0.9492, lr_0 = 8.6650e-04
Loss = 2.2081e-01, PNorm = 56.4807, GNorm = 0.9500, lr_0 = 8.6590e-04
Loss = 2.3299e-01, PNorm = 56.5110, GNorm = 0.9120, lr_0 = 8.6531e-04
Loss = 1.9689e-01, PNorm = 56.5348, GNorm = 0.9564, lr_0 = 8.6472e-04
Loss = 2.2398e-01, PNorm = 56.5539, GNorm = 2.6130, lr_0 = 8.6413e-04
Loss = 1.7522e-01, PNorm = 56.5837, GNorm = 0.9437, lr_0 = 8.6353e-04
Loss = 1.8991e-01, PNorm = 56.6072, GNorm = 0.8515, lr_0 = 8.6294e-04
Loss = 1.8302e-01, PNorm = 56.6328, GNorm = 1.1084, lr_0 = 8.6235e-04
Loss = 2.0105e-01, PNorm = 56.6506, GNorm = 0.9473, lr_0 = 8.6176e-04
Loss = 2.0460e-01, PNorm = 56.6771, GNorm = 0.8883, lr_0 = 8.6117e-04
Loss = 2.0776e-01, PNorm = 56.7009, GNorm = 1.7643, lr_0 = 8.6058e-04
Loss = 1.9097e-01, PNorm = 56.7364, GNorm = 1.6496, lr_0 = 8.5999e-04
Loss = 2.2756e-01, PNorm = 56.7547, GNorm = 0.6539, lr_0 = 8.5940e-04
Loss = 1.9596e-01, PNorm = 56.7899, GNorm = 1.3505, lr_0 = 8.5881e-04
Loss = 2.1055e-01, PNorm = 56.8203, GNorm = 1.2043, lr_0 = 8.5823e-04
Loss = 1.6355e-01, PNorm = 56.8459, GNorm = 2.0418, lr_0 = 8.5764e-04
Loss = 2.1169e-01, PNorm = 56.8690, GNorm = 0.7127, lr_0 = 8.5705e-04
Loss = 2.0680e-01, PNorm = 56.8998, GNorm = 0.9393, lr_0 = 8.5646e-04
Loss = 1.9803e-01, PNorm = 56.9200, GNorm = 0.9260, lr_0 = 8.5588e-04
Loss = 2.0667e-01, PNorm = 56.9426, GNorm = 2.9532, lr_0 = 8.5529e-04
Loss = 2.2079e-01, PNorm = 56.9613, GNorm = 0.7083, lr_0 = 8.5470e-04
Loss = 1.9475e-01, PNorm = 56.9850, GNorm = 0.7102, lr_0 = 8.5412e-04
Loss = 1.7625e-01, PNorm = 57.0055, GNorm = 0.9840, lr_0 = 8.5353e-04
Loss = 1.9061e-01, PNorm = 57.0318, GNorm = 1.5056, lr_0 = 8.5295e-04
Loss = 1.8521e-01, PNorm = 57.0525, GNorm = 0.8280, lr_0 = 8.5236e-04
Loss = 1.9191e-01, PNorm = 57.0722, GNorm = 1.2604, lr_0 = 8.5178e-04
Loss = 1.7800e-01, PNorm = 57.0917, GNorm = 1.5321, lr_0 = 8.5120e-04
Loss = 1.7880e-01, PNorm = 57.1110, GNorm = 1.4774, lr_0 = 8.5061e-04
Loss = 1.9843e-01, PNorm = 57.1390, GNorm = 1.2235, lr_0 = 8.5003e-04
Loss = 1.9309e-01, PNorm = 57.1690, GNorm = 1.0613, lr_0 = 8.4945e-04
Loss = 2.0248e-01, PNorm = 57.1978, GNorm = 0.9973, lr_0 = 8.4887e-04
Loss = 1.9720e-01, PNorm = 57.2113, GNorm = 0.9564, lr_0 = 8.4828e-04
Validation mae = 0.264455
Epoch 4
Loss = 1.9699e-01, PNorm = 57.2389, GNorm = 0.8581, lr_0 = 8.4770e-04
Loss = 1.6709e-01, PNorm = 57.2638, GNorm = 0.9302, lr_0 = 8.4712e-04
Loss = 1.8467e-01, PNorm = 57.2827, GNorm = 0.8526, lr_0 = 8.4654e-04
Loss = 1.9686e-01, PNorm = 57.3122, GNorm = 2.1782, lr_0 = 8.4596e-04
Loss = 1.6590e-01, PNorm = 57.3366, GNorm = 0.6959, lr_0 = 8.4538e-04
Loss = 1.7616e-01, PNorm = 57.3690, GNorm = 1.0727, lr_0 = 8.4480e-04
Loss = 1.9387e-01, PNorm = 57.3891, GNorm = 1.6075, lr_0 = 8.4423e-04
Loss = 1.8742e-01, PNorm = 57.4085, GNorm = 1.1304, lr_0 = 8.4365e-04
Loss = 1.9745e-01, PNorm = 57.4409, GNorm = 1.0081, lr_0 = 8.4307e-04
Loss = 1.9874e-01, PNorm = 57.4722, GNorm = 1.6577, lr_0 = 8.4249e-04
Loss = 1.8613e-01, PNorm = 57.5075, GNorm = 0.7391, lr_0 = 8.4191e-04
Loss = 2.0688e-01, PNorm = 57.5410, GNorm = 1.2272, lr_0 = 8.4134e-04
Loss = 1.7404e-01, PNorm = 57.5700, GNorm = 1.1066, lr_0 = 8.4076e-04
Loss = 1.9606e-01, PNorm = 57.5967, GNorm = 1.2879, lr_0 = 8.4019e-04
Loss = 1.7810e-01, PNorm = 57.6197, GNorm = 0.6201, lr_0 = 8.3961e-04
Loss = 1.8142e-01, PNorm = 57.6409, GNorm = 1.4798, lr_0 = 8.3903e-04
Loss = 1.7003e-01, PNorm = 57.6575, GNorm = 1.4367, lr_0 = 8.3846e-04
Loss = 1.9746e-01, PNorm = 57.6833, GNorm = 1.0415, lr_0 = 8.3789e-04
Loss = 1.8561e-01, PNorm = 57.7012, GNorm = 1.8693, lr_0 = 8.3731e-04
Loss = 2.1055e-01, PNorm = 57.7246, GNorm = 0.8188, lr_0 = 8.3674e-04
Loss = 1.8530e-01, PNorm = 57.7515, GNorm = 0.7283, lr_0 = 8.3616e-04
Loss = 1.7675e-01, PNorm = 57.7839, GNorm = 0.4995, lr_0 = 8.3559e-04
Loss = 1.8389e-01, PNorm = 57.8083, GNorm = 0.8946, lr_0 = 8.3502e-04
Loss = 1.8155e-01, PNorm = 57.8383, GNorm = 0.9377, lr_0 = 8.3445e-04
Loss = 1.9846e-01, PNorm = 57.8657, GNorm = 1.2319, lr_0 = 8.3388e-04
Loss = 1.6891e-01, PNorm = 57.8872, GNorm = 0.8622, lr_0 = 8.3330e-04
Loss = 1.9334e-01, PNorm = 57.9176, GNorm = 1.3037, lr_0 = 8.3273e-04
Loss = 2.1130e-01, PNorm = 57.9320, GNorm = 1.2095, lr_0 = 8.3216e-04
Loss = 2.0881e-01, PNorm = 57.9557, GNorm = 1.2960, lr_0 = 8.3159e-04
Loss = 1.9852e-01, PNorm = 57.9916, GNorm = 1.4104, lr_0 = 8.3102e-04
Loss = 1.9613e-01, PNorm = 58.0194, GNorm = 0.8684, lr_0 = 8.3045e-04
Loss = 1.9627e-01, PNorm = 58.0449, GNorm = 0.7327, lr_0 = 8.2988e-04
Loss = 1.4603e-01, PNorm = 58.0719, GNorm = 0.8587, lr_0 = 8.2932e-04
Loss = 1.9036e-01, PNorm = 58.0963, GNorm = 0.9362, lr_0 = 8.2875e-04
Loss = 1.8579e-01, PNorm = 58.1223, GNorm = 0.8279, lr_0 = 8.2818e-04
Loss = 1.7262e-01, PNorm = 58.1433, GNorm = 1.8381, lr_0 = 8.2761e-04
Loss = 2.1429e-01, PNorm = 58.1632, GNorm = 0.9484, lr_0 = 8.2705e-04
Loss = 2.0660e-01, PNorm = 58.1879, GNorm = 0.6403, lr_0 = 8.2648e-04
Loss = 1.9235e-01, PNorm = 58.2125, GNorm = 1.3169, lr_0 = 8.2591e-04
Loss = 1.7079e-01, PNorm = 58.2362, GNorm = 0.7279, lr_0 = 8.2535e-04
Loss = 1.9038e-01, PNorm = 58.2499, GNorm = 0.8904, lr_0 = 8.2478e-04
Loss = 1.8182e-01, PNorm = 58.2753, GNorm = 1.5739, lr_0 = 8.2422e-04
Loss = 2.1181e-01, PNorm = 58.2997, GNorm = 0.8136, lr_0 = 8.2365e-04
Loss = 1.8708e-01, PNorm = 58.3311, GNorm = 1.5070, lr_0 = 8.2309e-04
Loss = 1.9275e-01, PNorm = 58.3585, GNorm = 0.8400, lr_0 = 8.2252e-04
Loss = 1.7554e-01, PNorm = 58.3827, GNorm = 1.3231, lr_0 = 8.2196e-04
Loss = 1.7807e-01, PNorm = 58.4089, GNorm = 0.6823, lr_0 = 8.2140e-04
Loss = 2.1457e-01, PNorm = 58.4390, GNorm = 0.6823, lr_0 = 8.2084e-04
Loss = 1.9084e-01, PNorm = 58.4693, GNorm = 1.1939, lr_0 = 8.2027e-04
Loss = 1.7950e-01, PNorm = 58.4998, GNorm = 0.5766, lr_0 = 8.1971e-04
Loss = 1.8465e-01, PNorm = 58.5262, GNorm = 2.3296, lr_0 = 8.1915e-04
Loss = 1.6845e-01, PNorm = 58.5505, GNorm = 0.7371, lr_0 = 8.1859e-04
Loss = 1.7895e-01, PNorm = 58.5763, GNorm = 0.5461, lr_0 = 8.1803e-04
Loss = 2.0847e-01, PNorm = 58.6079, GNorm = 1.8293, lr_0 = 8.1747e-04
Loss = 1.8895e-01, PNorm = 58.6368, GNorm = 1.3999, lr_0 = 8.1691e-04
Loss = 2.0161e-01, PNorm = 58.6689, GNorm = 1.1049, lr_0 = 8.1635e-04
Loss = 1.9072e-01, PNorm = 58.6970, GNorm = 1.3411, lr_0 = 8.1579e-04
Loss = 2.1078e-01, PNorm = 58.7232, GNorm = 1.9133, lr_0 = 8.1523e-04
Loss = 1.9923e-01, PNorm = 58.7562, GNorm = 1.6858, lr_0 = 8.1467e-04
Loss = 1.7355e-01, PNorm = 58.7839, GNorm = 0.7678, lr_0 = 8.1411e-04
Loss = 1.7581e-01, PNorm = 58.8132, GNorm = 0.5193, lr_0 = 8.1355e-04
Loss = 1.7623e-01, PNorm = 58.8458, GNorm = 0.8625, lr_0 = 8.1300e-04
Loss = 1.6661e-01, PNorm = 58.8674, GNorm = 0.9139, lr_0 = 8.1244e-04
Loss = 1.7584e-01, PNorm = 58.8865, GNorm = 0.7477, lr_0 = 8.1188e-04
Loss = 1.9895e-01, PNorm = 58.9084, GNorm = 0.9368, lr_0 = 8.1133e-04
Loss = 1.6944e-01, PNorm = 58.9312, GNorm = 0.8017, lr_0 = 8.1077e-04
Loss = 1.8302e-01, PNorm = 58.9575, GNorm = 0.5916, lr_0 = 8.1022e-04
Loss = 1.6679e-01, PNorm = 58.9920, GNorm = 1.3361, lr_0 = 8.0966e-04
Loss = 1.6105e-01, PNorm = 59.0227, GNorm = 1.2280, lr_0 = 8.0911e-04
Loss = 1.7259e-01, PNorm = 59.0362, GNorm = 0.7081, lr_0 = 8.0855e-04
Loss = 1.9697e-01, PNorm = 59.0695, GNorm = 2.1305, lr_0 = 8.0800e-04
Loss = 1.6708e-01, PNorm = 59.0911, GNorm = 0.8928, lr_0 = 8.0745e-04
Loss = 1.7534e-01, PNorm = 59.1135, GNorm = 0.6550, lr_0 = 8.0689e-04
Loss = 1.9871e-01, PNorm = 59.1290, GNorm = 1.1226, lr_0 = 8.0634e-04
Loss = 1.9022e-01, PNorm = 59.1492, GNorm = 0.6234, lr_0 = 8.0579e-04
Loss = 1.7215e-01, PNorm = 59.1730, GNorm = 1.3518, lr_0 = 8.0523e-04
Loss = 1.9076e-01, PNorm = 59.1909, GNorm = 0.7195, lr_0 = 8.0468e-04
Loss = 1.8666e-01, PNorm = 59.2159, GNorm = 1.3590, lr_0 = 8.0413e-04
Loss = 1.7627e-01, PNorm = 59.2474, GNorm = 1.2068, lr_0 = 8.0358e-04
Loss = 1.5110e-01, PNorm = 59.2719, GNorm = 0.5760, lr_0 = 8.0303e-04
Loss = 1.8484e-01, PNorm = 59.2978, GNorm = 1.1071, lr_0 = 8.0248e-04
Loss = 2.0043e-01, PNorm = 59.3154, GNorm = 1.1913, lr_0 = 8.0193e-04
Loss = 1.7556e-01, PNorm = 59.3430, GNorm = 0.6858, lr_0 = 8.0138e-04
Loss = 2.0019e-01, PNorm = 59.3678, GNorm = 1.0971, lr_0 = 8.0083e-04
Loss = 1.7127e-01, PNorm = 59.3933, GNorm = 0.6509, lr_0 = 8.0028e-04
Loss = 1.9754e-01, PNorm = 59.4176, GNorm = 1.2225, lr_0 = 7.9974e-04
Loss = 1.7902e-01, PNorm = 59.4391, GNorm = 0.7823, lr_0 = 7.9919e-04
Loss = 1.8122e-01, PNorm = 59.4587, GNorm = 1.2948, lr_0 = 7.9864e-04
Loss = 1.6976e-01, PNorm = 59.4762, GNorm = 0.7667, lr_0 = 7.9809e-04
Loss = 1.9290e-01, PNorm = 59.5020, GNorm = 0.9123, lr_0 = 7.9755e-04
Loss = 1.7952e-01, PNorm = 59.5260, GNorm = 0.8519, lr_0 = 7.9700e-04
Loss = 1.9415e-01, PNorm = 59.5489, GNorm = 0.9143, lr_0 = 7.9645e-04
Loss = 1.9295e-01, PNorm = 59.5793, GNorm = 1.4301, lr_0 = 7.9591e-04
Loss = 2.0846e-01, PNorm = 59.6105, GNorm = 1.4133, lr_0 = 7.9536e-04
Loss = 1.9321e-01, PNorm = 59.6493, GNorm = 0.7313, lr_0 = 7.9482e-04
Loss = 1.9919e-01, PNorm = 59.6836, GNorm = 0.5569, lr_0 = 7.9427e-04
Loss = 1.7897e-01, PNorm = 59.7048, GNorm = 1.6434, lr_0 = 7.9373e-04
Loss = 2.0261e-01, PNorm = 59.7283, GNorm = 1.2198, lr_0 = 7.9319e-04
Loss = 1.8356e-01, PNorm = 59.7550, GNorm = 0.6032, lr_0 = 7.9264e-04
Loss = 2.0051e-01, PNorm = 59.7798, GNorm = 1.0557, lr_0 = 7.9210e-04
Loss = 2.3567e-01, PNorm = 59.8088, GNorm = 0.6917, lr_0 = 7.9156e-04
Loss = 2.0583e-01, PNorm = 59.8340, GNorm = 1.4858, lr_0 = 7.9101e-04
Loss = 1.8142e-01, PNorm = 59.8692, GNorm = 0.6987, lr_0 = 7.9047e-04
Loss = 1.8358e-01, PNorm = 59.8930, GNorm = 0.7785, lr_0 = 7.8993e-04
Loss = 1.8446e-01, PNorm = 59.9152, GNorm = 2.4355, lr_0 = 7.8939e-04
Loss = 1.8788e-01, PNorm = 59.9473, GNorm = 1.0522, lr_0 = 7.8885e-04
Loss = 1.9116e-01, PNorm = 59.9796, GNorm = 0.7546, lr_0 = 7.8831e-04
Loss = 1.8360e-01, PNorm = 60.0024, GNorm = 1.3710, lr_0 = 7.8777e-04
Loss = 1.8692e-01, PNorm = 60.0262, GNorm = 0.8493, lr_0 = 7.8723e-04
Loss = 2.0035e-01, PNorm = 60.0443, GNorm = 1.0216, lr_0 = 7.8669e-04
Loss = 2.3796e-01, PNorm = 60.0776, GNorm = 0.9622, lr_0 = 7.8615e-04
Loss = 2.0153e-01, PNorm = 60.1128, GNorm = 1.1636, lr_0 = 7.8561e-04
Loss = 1.6973e-01, PNorm = 60.1413, GNorm = 0.7710, lr_0 = 7.8507e-04
Loss = 2.0911e-01, PNorm = 60.1726, GNorm = 0.7455, lr_0 = 7.8454e-04
Loss = 1.9181e-01, PNorm = 60.1995, GNorm = 0.7991, lr_0 = 7.8400e-04
Loss = 1.9620e-01, PNorm = 60.2209, GNorm = 0.7798, lr_0 = 7.8346e-04
Loss = 1.8628e-01, PNorm = 60.2426, GNorm = 2.7317, lr_0 = 7.8293e-04
Loss = 2.0091e-01, PNorm = 60.2620, GNorm = 1.6980, lr_0 = 7.8239e-04
Loss = 2.0680e-01, PNorm = 60.2987, GNorm = 2.4284, lr_0 = 7.8185e-04
Loss = 1.8380e-01, PNorm = 60.3254, GNorm = 1.1404, lr_0 = 7.8132e-04
Validation mae = 0.261919
Epoch 5
Loss = 1.5946e-01, PNorm = 60.3492, GNorm = 0.9650, lr_0 = 7.8078e-04
Loss = 1.7344e-01, PNorm = 60.3659, GNorm = 1.4165, lr_0 = 7.8025e-04
Loss = 1.6720e-01, PNorm = 60.3869, GNorm = 1.3919, lr_0 = 7.7971e-04
Loss = 1.8898e-01, PNorm = 60.4129, GNorm = 0.6925, lr_0 = 7.7918e-04
Loss = 1.7376e-01, PNorm = 60.4392, GNorm = 1.8367, lr_0 = 7.7864e-04
Loss = 1.5704e-01, PNorm = 60.4604, GNorm = 0.6673, lr_0 = 7.7811e-04
Loss = 1.5208e-01, PNorm = 60.4805, GNorm = 1.2491, lr_0 = 7.7758e-04
Loss = 1.9487e-01, PNorm = 60.4997, GNorm = 1.0662, lr_0 = 7.7705e-04
Loss = 1.7919e-01, PNorm = 60.5238, GNorm = 0.6869, lr_0 = 7.7651e-04
Loss = 1.9828e-01, PNorm = 60.5536, GNorm = 1.1551, lr_0 = 7.7598e-04
Loss = 1.6942e-01, PNorm = 60.5826, GNorm = 0.5972, lr_0 = 7.7545e-04
Loss = 1.6771e-01, PNorm = 60.6050, GNorm = 0.8441, lr_0 = 7.7492e-04
Loss = 1.8606e-01, PNorm = 60.6282, GNorm = 0.8347, lr_0 = 7.7439e-04
Loss = 1.8108e-01, PNorm = 60.6590, GNorm = 0.6393, lr_0 = 7.7386e-04
Loss = 1.5503e-01, PNorm = 60.6815, GNorm = 1.3286, lr_0 = 7.7333e-04
Loss = 1.9676e-01, PNorm = 60.7060, GNorm = 2.2716, lr_0 = 7.7280e-04
Loss = 1.6416e-01, PNorm = 60.7309, GNorm = 0.9384, lr_0 = 7.7227e-04
Loss = 1.5248e-01, PNorm = 60.7541, GNorm = 0.7438, lr_0 = 7.7174e-04
Loss = 1.6557e-01, PNorm = 60.7822, GNorm = 1.3106, lr_0 = 7.7121e-04
Loss = 1.7722e-01, PNorm = 60.8064, GNorm = 1.3063, lr_0 = 7.7068e-04
Loss = 1.9156e-01, PNorm = 60.8316, GNorm = 1.3656, lr_0 = 7.7015e-04
Loss = 1.5369e-01, PNorm = 60.8592, GNorm = 1.0665, lr_0 = 7.6963e-04
Loss = 1.9420e-01, PNorm = 60.8922, GNorm = 1.5477, lr_0 = 7.6910e-04
Loss = 1.8487e-01, PNorm = 60.9110, GNorm = 0.6684, lr_0 = 7.6857e-04
Loss = 1.6404e-01, PNorm = 60.9380, GNorm = 1.3242, lr_0 = 7.6805e-04
Loss = 1.7528e-01, PNorm = 60.9555, GNorm = 0.6307, lr_0 = 7.6752e-04
Loss = 1.9352e-01, PNorm = 60.9788, GNorm = 1.0302, lr_0 = 7.6699e-04
Loss = 1.7476e-01, PNorm = 61.0046, GNorm = 1.5246, lr_0 = 7.6647e-04
Loss = 1.8350e-01, PNorm = 61.0303, GNorm = 0.7182, lr_0 = 7.6594e-04
Loss = 1.6770e-01, PNorm = 61.0563, GNorm = 0.6597, lr_0 = 7.6542e-04
Loss = 1.7136e-01, PNorm = 61.0796, GNorm = 0.6216, lr_0 = 7.6489e-04
Loss = 1.7665e-01, PNorm = 61.1055, GNorm = 0.8753, lr_0 = 7.6437e-04
Loss = 1.6893e-01, PNorm = 61.1256, GNorm = 0.7647, lr_0 = 7.6385e-04
Loss = 1.5659e-01, PNorm = 61.1479, GNorm = 1.1875, lr_0 = 7.6332e-04
Loss = 1.8808e-01, PNorm = 61.1744, GNorm = 1.0765, lr_0 = 7.6280e-04
Loss = 1.5941e-01, PNorm = 61.2036, GNorm = 1.8648, lr_0 = 7.6228e-04
Loss = 1.5693e-01, PNorm = 61.2334, GNorm = 0.5816, lr_0 = 7.6176e-04
Loss = 1.8093e-01, PNorm = 61.2576, GNorm = 1.6068, lr_0 = 7.6123e-04
Loss = 1.6569e-01, PNorm = 61.2805, GNorm = 0.6515, lr_0 = 7.6071e-04
Loss = 1.8872e-01, PNorm = 61.3050, GNorm = 1.4279, lr_0 = 7.6019e-04
Loss = 2.1938e-01, PNorm = 61.3346, GNorm = 1.1388, lr_0 = 7.5967e-04
Loss = 1.8520e-01, PNorm = 61.3629, GNorm = 0.8063, lr_0 = 7.5915e-04
Loss = 1.6371e-01, PNorm = 61.3893, GNorm = 0.5530, lr_0 = 7.5863e-04
Loss = 1.7377e-01, PNorm = 61.4053, GNorm = 0.6662, lr_0 = 7.5811e-04
Loss = 1.8960e-01, PNorm = 61.4292, GNorm = 1.4713, lr_0 = 7.5759e-04
Loss = 1.6617e-01, PNorm = 61.4581, GNorm = 1.2110, lr_0 = 7.5707e-04
Loss = 1.7465e-01, PNorm = 61.4750, GNorm = 1.0801, lr_0 = 7.5655e-04
Loss = 1.7320e-01, PNorm = 61.4979, GNorm = 1.5783, lr_0 = 7.5603e-04
Loss = 1.4618e-01, PNorm = 61.5221, GNorm = 0.9053, lr_0 = 7.5552e-04
Loss = 1.7006e-01, PNorm = 61.5411, GNorm = 1.0587, lr_0 = 7.5500e-04
Loss = 1.7489e-01, PNorm = 61.5603, GNorm = 1.5642, lr_0 = 7.5448e-04
Loss = 1.9059e-01, PNorm = 61.5784, GNorm = 1.1853, lr_0 = 7.5397e-04
Loss = 1.7185e-01, PNorm = 61.5998, GNorm = 0.6833, lr_0 = 7.5345e-04
Loss = 1.6008e-01, PNorm = 61.6256, GNorm = 1.1020, lr_0 = 7.5293e-04
Loss = 1.7289e-01, PNorm = 61.6465, GNorm = 1.9897, lr_0 = 7.5242e-04
Loss = 1.7778e-01, PNorm = 61.6711, GNorm = 0.7807, lr_0 = 7.5190e-04
Loss = 1.8178e-01, PNorm = 61.7006, GNorm = 0.6909, lr_0 = 7.5139e-04
Loss = 1.4669e-01, PNorm = 61.7202, GNorm = 0.6521, lr_0 = 7.5087e-04
Loss = 1.7456e-01, PNorm = 61.7421, GNorm = 0.9118, lr_0 = 7.5036e-04
Loss = 1.9624e-01, PNorm = 61.7590, GNorm = 1.1552, lr_0 = 7.4984e-04
Loss = 1.7132e-01, PNorm = 61.7859, GNorm = 1.7403, lr_0 = 7.4933e-04
Loss = 1.8049e-01, PNorm = 61.8151, GNorm = 2.5382, lr_0 = 7.4882e-04
Loss = 1.9277e-01, PNorm = 61.8426, GNorm = 2.3672, lr_0 = 7.4830e-04
Loss = 1.8330e-01, PNorm = 61.8763, GNorm = 0.7358, lr_0 = 7.4779e-04
Loss = 1.6568e-01, PNorm = 61.9107, GNorm = 0.9841, lr_0 = 7.4728e-04
Loss = 1.6440e-01, PNorm = 61.9397, GNorm = 0.6790, lr_0 = 7.4677e-04
Loss = 1.8416e-01, PNorm = 61.9683, GNorm = 0.8982, lr_0 = 7.4625e-04
Loss = 1.6732e-01, PNorm = 61.9938, GNorm = 0.7477, lr_0 = 7.4574e-04
Loss = 1.7533e-01, PNorm = 62.0150, GNorm = 1.6698, lr_0 = 7.4523e-04
Loss = 1.9050e-01, PNorm = 62.0332, GNorm = 0.6111, lr_0 = 7.4472e-04
Loss = 2.0759e-01, PNorm = 62.0462, GNorm = 0.9015, lr_0 = 7.4421e-04
Loss = 2.0405e-01, PNorm = 62.0874, GNorm = 0.8749, lr_0 = 7.4370e-04
Loss = 1.7479e-01, PNorm = 62.1176, GNorm = 0.9528, lr_0 = 7.4319e-04
Loss = 1.6589e-01, PNorm = 62.1597, GNorm = 0.8897, lr_0 = 7.4268e-04
Loss = 2.0047e-01, PNorm = 62.1923, GNorm = 1.2384, lr_0 = 7.4217e-04
Loss = 1.7213e-01, PNorm = 62.2335, GNorm = 0.8622, lr_0 = 7.4167e-04
Loss = 1.6177e-01, PNorm = 62.2570, GNorm = 0.6687, lr_0 = 7.4116e-04
Loss = 1.7790e-01, PNorm = 62.2799, GNorm = 0.8394, lr_0 = 7.4065e-04
Loss = 1.5120e-01, PNorm = 62.2960, GNorm = 1.1496, lr_0 = 7.4014e-04
Loss = 1.8289e-01, PNorm = 62.3180, GNorm = 1.0499, lr_0 = 7.3964e-04
Loss = 1.5275e-01, PNorm = 62.3370, GNorm = 0.9716, lr_0 = 7.3913e-04
Loss = 1.9090e-01, PNorm = 62.3626, GNorm = 1.4814, lr_0 = 7.3862e-04
Loss = 1.8092e-01, PNorm = 62.3860, GNorm = 0.5760, lr_0 = 7.3812e-04
Loss = 2.0249e-01, PNorm = 62.4140, GNorm = 1.7490, lr_0 = 7.3761e-04
Loss = 1.6151e-01, PNorm = 62.4412, GNorm = 1.4203, lr_0 = 7.3711e-04
Loss = 1.9169e-01, PNorm = 62.4616, GNorm = 0.7794, lr_0 = 7.3660e-04
Loss = 1.7758e-01, PNorm = 62.4796, GNorm = 1.4856, lr_0 = 7.3610e-04
Loss = 1.5710e-01, PNorm = 62.5019, GNorm = 0.8359, lr_0 = 7.3559e-04
Loss = 1.6481e-01, PNorm = 62.5208, GNorm = 0.9319, lr_0 = 7.3509e-04
Loss = 1.7332e-01, PNorm = 62.5401, GNorm = 0.7326, lr_0 = 7.3458e-04
Loss = 1.8268e-01, PNorm = 62.5562, GNorm = 0.8187, lr_0 = 7.3408e-04
Loss = 1.6802e-01, PNorm = 62.5761, GNorm = 1.1862, lr_0 = 7.3358e-04
Loss = 1.7164e-01, PNorm = 62.6010, GNorm = 1.8022, lr_0 = 7.3308e-04
Loss = 1.8146e-01, PNorm = 62.6246, GNorm = 0.6961, lr_0 = 7.3257e-04
Loss = 1.8530e-01, PNorm = 62.6448, GNorm = 0.9433, lr_0 = 7.3207e-04
Loss = 1.6819e-01, PNorm = 62.6700, GNorm = 1.5204, lr_0 = 7.3157e-04
Loss = 1.7566e-01, PNorm = 62.6931, GNorm = 0.4939, lr_0 = 7.3107e-04
Loss = 1.7880e-01, PNorm = 62.7215, GNorm = 0.7919, lr_0 = 7.3057e-04
Loss = 1.7632e-01, PNorm = 62.7445, GNorm = 0.7210, lr_0 = 7.3007e-04
Loss = 1.5910e-01, PNorm = 62.7638, GNorm = 0.7728, lr_0 = 7.2957e-04
Loss = 1.6309e-01, PNorm = 62.7811, GNorm = 1.0439, lr_0 = 7.2907e-04
Loss = 1.7655e-01, PNorm = 62.7999, GNorm = 0.9757, lr_0 = 7.2857e-04
Loss = 1.7732e-01, PNorm = 62.8357, GNorm = 0.7688, lr_0 = 7.2807e-04
Loss = 1.7182e-01, PNorm = 62.8572, GNorm = 1.6047, lr_0 = 7.2757e-04
Loss = 1.6407e-01, PNorm = 62.8731, GNorm = 1.8238, lr_0 = 7.2707e-04
Loss = 1.8172e-01, PNorm = 62.8985, GNorm = 1.4446, lr_0 = 7.2657e-04
Loss = 1.8289e-01, PNorm = 62.9183, GNorm = 1.3734, lr_0 = 7.2608e-04
Loss = 1.7863e-01, PNorm = 62.9457, GNorm = 0.6637, lr_0 = 7.2558e-04
Loss = 1.7024e-01, PNorm = 62.9633, GNorm = 1.7446, lr_0 = 7.2508e-04
Loss = 1.7698e-01, PNorm = 62.9820, GNorm = 1.0194, lr_0 = 7.2458e-04
Loss = 1.9057e-01, PNorm = 62.9993, GNorm = 1.1390, lr_0 = 7.2409e-04
Loss = 1.8129e-01, PNorm = 63.0274, GNorm = 1.9145, lr_0 = 7.2359e-04
Loss = 1.6555e-01, PNorm = 63.0594, GNorm = 0.9282, lr_0 = 7.2310e-04
Loss = 1.9467e-01, PNorm = 63.0889, GNorm = 1.1399, lr_0 = 7.2260e-04
Loss = 1.6574e-01, PNorm = 63.1142, GNorm = 0.6680, lr_0 = 7.2211e-04
Loss = 1.5397e-01, PNorm = 63.1255, GNorm = 1.0196, lr_0 = 7.2161e-04
Loss = 1.6448e-01, PNorm = 63.1371, GNorm = 0.6514, lr_0 = 7.2112e-04
Loss = 1.9598e-01, PNorm = 63.1565, GNorm = 0.5742, lr_0 = 7.2062e-04
Loss = 1.7436e-01, PNorm = 63.1808, GNorm = 0.7177, lr_0 = 7.2013e-04
Loss = 1.7645e-01, PNorm = 63.2126, GNorm = 1.4411, lr_0 = 7.1964e-04
Validation mae = 0.251117
Epoch 6
Loss = 1.4047e-01, PNorm = 63.2348, GNorm = 0.5253, lr_0 = 7.1914e-04
Loss = 1.5414e-01, PNorm = 63.2581, GNorm = 1.0834, lr_0 = 7.1865e-04
Loss = 1.5805e-01, PNorm = 63.2796, GNorm = 0.8144, lr_0 = 7.1816e-04
Loss = 1.6246e-01, PNorm = 63.2983, GNorm = 0.7591, lr_0 = 7.1767e-04
Loss = 1.7613e-01, PNorm = 63.3233, GNorm = 1.0623, lr_0 = 7.1717e-04
Loss = 1.7021e-01, PNorm = 63.3525, GNorm = 0.6502, lr_0 = 7.1668e-04
Loss = 1.7129e-01, PNorm = 63.3783, GNorm = 1.2558, lr_0 = 7.1619e-04
Loss = 1.7003e-01, PNorm = 63.4124, GNorm = 0.6143, lr_0 = 7.1570e-04
Loss = 1.7248e-01, PNorm = 63.4406, GNorm = 0.4998, lr_0 = 7.1521e-04
Loss = 1.8062e-01, PNorm = 63.4681, GNorm = 2.1008, lr_0 = 7.1472e-04
Loss = 2.1213e-01, PNorm = 63.5069, GNorm = 1.3163, lr_0 = 7.1423e-04
Loss = 1.7037e-01, PNorm = 63.5361, GNorm = 0.7748, lr_0 = 7.1374e-04
Loss = 1.6809e-01, PNorm = 63.5630, GNorm = 0.7310, lr_0 = 7.1325e-04
Loss = 1.5429e-01, PNorm = 63.5876, GNorm = 0.8506, lr_0 = 7.1277e-04
Loss = 1.6605e-01, PNorm = 63.6046, GNorm = 0.6104, lr_0 = 7.1228e-04
Loss = 1.3696e-01, PNorm = 63.6234, GNorm = 0.8911, lr_0 = 7.1179e-04
Loss = 1.5009e-01, PNorm = 63.6458, GNorm = 0.6775, lr_0 = 7.1130e-04
Loss = 1.8184e-01, PNorm = 63.6674, GNorm = 0.7311, lr_0 = 7.1081e-04
Loss = 1.7270e-01, PNorm = 63.6805, GNorm = 1.0173, lr_0 = 7.1033e-04
Loss = 1.7714e-01, PNorm = 63.6912, GNorm = 0.9678, lr_0 = 7.0984e-04
Loss = 1.7393e-01, PNorm = 63.7202, GNorm = 0.9497, lr_0 = 7.0935e-04
Loss = 1.6658e-01, PNorm = 63.7503, GNorm = 0.6973, lr_0 = 7.0887e-04
Loss = 1.4218e-01, PNorm = 63.7829, GNorm = 0.7371, lr_0 = 7.0838e-04
Loss = 1.5879e-01, PNorm = 63.7991, GNorm = 0.6425, lr_0 = 7.0790e-04
Loss = 1.5366e-01, PNorm = 63.8232, GNorm = 0.8740, lr_0 = 7.0741e-04
Loss = 1.6510e-01, PNorm = 63.8379, GNorm = 0.8293, lr_0 = 7.0693e-04
Loss = 1.7561e-01, PNorm = 63.8621, GNorm = 0.6558, lr_0 = 7.0644e-04
Loss = 1.7669e-01, PNorm = 63.8821, GNorm = 0.8455, lr_0 = 7.0596e-04
Loss = 1.5919e-01, PNorm = 63.9052, GNorm = 0.5991, lr_0 = 7.0548e-04
Loss = 1.5723e-01, PNorm = 63.9210, GNorm = 0.9969, lr_0 = 7.0499e-04
Loss = 1.5783e-01, PNorm = 63.9510, GNorm = 0.6553, lr_0 = 7.0451e-04
Loss = 1.7884e-01, PNorm = 63.9794, GNorm = 0.5771, lr_0 = 7.0403e-04
Loss = 1.6527e-01, PNorm = 64.0063, GNorm = 0.9192, lr_0 = 7.0354e-04
Loss = 2.0158e-01, PNorm = 64.0361, GNorm = 0.8038, lr_0 = 7.0306e-04
Loss = 1.9370e-01, PNorm = 64.0675, GNorm = 0.8748, lr_0 = 7.0258e-04
Loss = 1.8688e-01, PNorm = 64.0922, GNorm = 2.0743, lr_0 = 7.0210e-04
Loss = 1.6644e-01, PNorm = 64.1294, GNorm = 1.1345, lr_0 = 7.0162e-04
Loss = 1.6718e-01, PNorm = 64.1592, GNorm = 0.9823, lr_0 = 7.0114e-04
Loss = 1.6829e-01, PNorm = 64.1761, GNorm = 0.9231, lr_0 = 7.0066e-04
Loss = 1.5217e-01, PNorm = 64.1971, GNorm = 0.6827, lr_0 = 7.0018e-04
Loss = 1.7179e-01, PNorm = 64.2194, GNorm = 0.6502, lr_0 = 6.9970e-04
Loss = 1.3949e-01, PNorm = 64.2356, GNorm = 0.9214, lr_0 = 6.9922e-04
Loss = 1.9954e-01, PNorm = 64.2544, GNorm = 0.8636, lr_0 = 6.9874e-04
Loss = 1.6058e-01, PNorm = 64.2766, GNorm = 2.0127, lr_0 = 6.9826e-04
Loss = 1.6527e-01, PNorm = 64.3019, GNorm = 0.8555, lr_0 = 6.9778e-04
Loss = 1.8711e-01, PNorm = 64.3261, GNorm = 1.3009, lr_0 = 6.9730e-04
Loss = 1.8667e-01, PNorm = 64.3639, GNorm = 1.1813, lr_0 = 6.9683e-04
Loss = 1.7945e-01, PNorm = 64.3854, GNorm = 1.2277, lr_0 = 6.9635e-04
Loss = 1.5380e-01, PNorm = 64.4050, GNorm = 0.9012, lr_0 = 6.9587e-04
Loss = 1.6493e-01, PNorm = 64.4204, GNorm = 0.8020, lr_0 = 6.9540e-04
Loss = 1.6020e-01, PNorm = 64.4402, GNorm = 1.6828, lr_0 = 6.9492e-04
Loss = 1.3253e-01, PNorm = 64.4624, GNorm = 0.6843, lr_0 = 6.9444e-04
Loss = 1.6684e-01, PNorm = 64.4859, GNorm = 0.9186, lr_0 = 6.9397e-04
Loss = 1.7483e-01, PNorm = 64.5146, GNorm = 1.2133, lr_0 = 6.9349e-04
Loss = 1.6345e-01, PNorm = 64.5358, GNorm = 1.0439, lr_0 = 6.9302e-04
Loss = 1.6806e-01, PNorm = 64.5566, GNorm = 1.2459, lr_0 = 6.9254e-04
Loss = 1.6025e-01, PNorm = 64.5700, GNorm = 0.8385, lr_0 = 6.9207e-04
Loss = 1.6583e-01, PNorm = 64.5854, GNorm = 0.5677, lr_0 = 6.9159e-04
Loss = 1.5312e-01, PNorm = 64.6048, GNorm = 0.9190, lr_0 = 6.9112e-04
Loss = 1.4920e-01, PNorm = 64.6225, GNorm = 0.5826, lr_0 = 6.9065e-04
Loss = 1.6615e-01, PNorm = 64.6368, GNorm = 0.8544, lr_0 = 6.9017e-04
Loss = 1.8475e-01, PNorm = 64.6579, GNorm = 0.7828, lr_0 = 6.8970e-04
Loss = 1.8813e-01, PNorm = 64.6798, GNorm = 0.9689, lr_0 = 6.8923e-04
Loss = 1.7818e-01, PNorm = 64.6967, GNorm = 0.6883, lr_0 = 6.8876e-04
Loss = 1.6665e-01, PNorm = 64.7303, GNorm = 0.9592, lr_0 = 6.8828e-04
Loss = 1.8903e-01, PNorm = 64.7465, GNorm = 0.7709, lr_0 = 6.8781e-04
Loss = 1.6791e-01, PNorm = 64.7713, GNorm = 1.0186, lr_0 = 6.8734e-04
Loss = 1.6953e-01, PNorm = 64.8015, GNorm = 0.8230, lr_0 = 6.8687e-04
Loss = 1.8129e-01, PNorm = 64.8327, GNorm = 0.5649, lr_0 = 6.8640e-04
Loss = 1.5181e-01, PNorm = 64.8571, GNorm = 1.6159, lr_0 = 6.8593e-04
Loss = 1.7659e-01, PNorm = 64.8800, GNorm = 0.8528, lr_0 = 6.8546e-04
Loss = 1.6194e-01, PNorm = 64.9057, GNorm = 0.9424, lr_0 = 6.8499e-04
Loss = 1.6284e-01, PNorm = 64.9242, GNorm = 0.8901, lr_0 = 6.8452e-04
Loss = 1.7661e-01, PNorm = 64.9479, GNorm = 1.5001, lr_0 = 6.8405e-04
Loss = 1.8173e-01, PNorm = 64.9631, GNorm = 0.8621, lr_0 = 6.8358e-04
Loss = 1.5218e-01, PNorm = 64.9785, GNorm = 0.9811, lr_0 = 6.8312e-04
Loss = 1.7589e-01, PNorm = 65.0041, GNorm = 0.9783, lr_0 = 6.8265e-04
Loss = 1.6723e-01, PNorm = 65.0195, GNorm = 0.9874, lr_0 = 6.8218e-04
Loss = 1.5352e-01, PNorm = 65.0387, GNorm = 1.7801, lr_0 = 6.8171e-04
Loss = 1.7535e-01, PNorm = 65.0593, GNorm = 0.7534, lr_0 = 6.8125e-04
Loss = 1.5095e-01, PNorm = 65.0840, GNorm = 1.0720, lr_0 = 6.8078e-04
Loss = 1.6283e-01, PNorm = 65.0986, GNorm = 1.1227, lr_0 = 6.8031e-04
Loss = 1.6013e-01, PNorm = 65.1341, GNorm = 0.9615, lr_0 = 6.7985e-04
Loss = 1.7941e-01, PNorm = 65.1562, GNorm = 0.7186, lr_0 = 6.7938e-04
Loss = 1.5501e-01, PNorm = 65.1800, GNorm = 0.8528, lr_0 = 6.7892e-04
Loss = 1.7686e-01, PNorm = 65.2043, GNorm = 0.7730, lr_0 = 6.7845e-04
Loss = 1.6247e-01, PNorm = 65.2341, GNorm = 1.3398, lr_0 = 6.7799e-04
Loss = 1.6559e-01, PNorm = 65.2562, GNorm = 0.9658, lr_0 = 6.7752e-04
Loss = 1.9100e-01, PNorm = 65.2885, GNorm = 0.9678, lr_0 = 6.7706e-04
Loss = 1.8760e-01, PNorm = 65.3142, GNorm = 1.0245, lr_0 = 6.7659e-04
Loss = 1.5243e-01, PNorm = 65.3429, GNorm = 0.9433, lr_0 = 6.7613e-04
Loss = 1.6870e-01, PNorm = 65.3599, GNorm = 0.5556, lr_0 = 6.7567e-04
Loss = 1.7046e-01, PNorm = 65.3890, GNorm = 0.9578, lr_0 = 6.7520e-04
Loss = 1.6130e-01, PNorm = 65.4115, GNorm = 1.3816, lr_0 = 6.7474e-04
Loss = 1.7018e-01, PNorm = 65.4339, GNorm = 0.6370, lr_0 = 6.7428e-04
Loss = 1.3446e-01, PNorm = 65.4492, GNorm = 0.7151, lr_0 = 6.7382e-04
Loss = 1.5137e-01, PNorm = 65.4690, GNorm = 1.3320, lr_0 = 6.7335e-04
Loss = 1.8386e-01, PNorm = 65.4895, GNorm = 1.7718, lr_0 = 6.7289e-04
Loss = 1.3048e-01, PNorm = 65.5062, GNorm = 0.5928, lr_0 = 6.7243e-04
Loss = 1.5653e-01, PNorm = 65.5254, GNorm = 0.9541, lr_0 = 6.7197e-04
Loss = 1.5455e-01, PNorm = 65.5496, GNorm = 0.7635, lr_0 = 6.7151e-04
Loss = 1.5381e-01, PNorm = 65.5704, GNorm = 2.0690, lr_0 = 6.7105e-04
Loss = 1.7257e-01, PNorm = 65.5932, GNorm = 1.3150, lr_0 = 6.7059e-04
Loss = 1.8045e-01, PNorm = 65.6206, GNorm = 0.6902, lr_0 = 6.7013e-04
Loss = 1.7958e-01, PNorm = 65.6336, GNorm = 1.5471, lr_0 = 6.6967e-04
Loss = 1.5399e-01, PNorm = 65.6557, GNorm = 0.6569, lr_0 = 6.6921e-04
Loss = 1.4606e-01, PNorm = 65.6773, GNorm = 0.8190, lr_0 = 6.6876e-04
Loss = 1.5635e-01, PNorm = 65.6939, GNorm = 0.6427, lr_0 = 6.6830e-04
Loss = 1.7771e-01, PNorm = 65.7112, GNorm = 0.5051, lr_0 = 6.6784e-04
Loss = 1.8702e-01, PNorm = 65.7383, GNorm = 0.8281, lr_0 = 6.6738e-04
Loss = 1.7331e-01, PNorm = 65.7610, GNorm = 0.6146, lr_0 = 6.6693e-04
Loss = 1.5206e-01, PNorm = 65.7836, GNorm = 1.1266, lr_0 = 6.6647e-04
Loss = 1.7023e-01, PNorm = 65.8004, GNorm = 0.9426, lr_0 = 6.6601e-04
Loss = 1.9108e-01, PNorm = 65.8198, GNorm = 2.1605, lr_0 = 6.6556e-04
Loss = 1.8137e-01, PNorm = 65.8456, GNorm = 2.0993, lr_0 = 6.6510e-04
Loss = 1.9049e-01, PNorm = 65.8797, GNorm = 2.3276, lr_0 = 6.6464e-04
Loss = 1.6091e-01, PNorm = 65.9042, GNorm = 0.7952, lr_0 = 6.6419e-04
Loss = 1.4923e-01, PNorm = 65.9234, GNorm = 0.7666, lr_0 = 6.6373e-04
Loss = 1.5756e-01, PNorm = 65.9436, GNorm = 1.3733, lr_0 = 6.6328e-04
Loss = 1.7077e-01, PNorm = 65.9623, GNorm = 0.9958, lr_0 = 6.6282e-04
Validation mae = 0.255454
Epoch 7
Loss = 1.4938e-01, PNorm = 65.9940, GNorm = 0.9944, lr_0 = 6.6237e-04
Loss = 1.9419e-01, PNorm = 66.0022, GNorm = 0.8366, lr_0 = 6.6192e-04
Loss = 1.6743e-01, PNorm = 66.0278, GNorm = 1.1293, lr_0 = 6.6146e-04
Loss = 1.7221e-01, PNorm = 66.0534, GNorm = 1.0092, lr_0 = 6.6101e-04
Loss = 1.7481e-01, PNorm = 66.0852, GNorm = 0.7378, lr_0 = 6.6056e-04
Loss = 1.7163e-01, PNorm = 66.1171, GNorm = 0.6030, lr_0 = 6.6011e-04
Loss = 1.6074e-01, PNorm = 66.1490, GNorm = 1.6411, lr_0 = 6.5965e-04
Loss = 1.4524e-01, PNorm = 66.1731, GNorm = 0.5560, lr_0 = 6.5920e-04
Loss = 1.7047e-01, PNorm = 66.1949, GNorm = 0.6891, lr_0 = 6.5875e-04
Loss = 1.6291e-01, PNorm = 66.2108, GNorm = 0.9738, lr_0 = 6.5830e-04
Loss = 1.8684e-01, PNorm = 66.2329, GNorm = 0.5100, lr_0 = 6.5785e-04
Loss = 1.3572e-01, PNorm = 66.2466, GNorm = 0.6324, lr_0 = 6.5740e-04
Loss = 1.5399e-01, PNorm = 66.2654, GNorm = 0.9069, lr_0 = 6.5695e-04
Loss = 1.3856e-01, PNorm = 66.2818, GNorm = 0.5986, lr_0 = 6.5650e-04
Loss = 1.5252e-01, PNorm = 66.3014, GNorm = 1.1868, lr_0 = 6.5605e-04
Loss = 1.7088e-01, PNorm = 66.3227, GNorm = 0.7518, lr_0 = 6.5560e-04
Loss = 1.5417e-01, PNorm = 66.3399, GNorm = 1.0615, lr_0 = 6.5515e-04
Loss = 1.5087e-01, PNorm = 66.3566, GNorm = 1.1419, lr_0 = 6.5470e-04
Loss = 1.7150e-01, PNorm = 66.3785, GNorm = 1.4998, lr_0 = 6.5425e-04
Loss = 1.7543e-01, PNorm = 66.4033, GNorm = 0.7143, lr_0 = 6.5380e-04
Loss = 1.7001e-01, PNorm = 66.4313, GNorm = 0.6507, lr_0 = 6.5335e-04
Loss = 1.2858e-01, PNorm = 66.4522, GNorm = 0.5868, lr_0 = 6.5291e-04
Loss = 1.5161e-01, PNorm = 66.4682, GNorm = 0.9618, lr_0 = 6.5246e-04
Loss = 1.4843e-01, PNorm = 66.4825, GNorm = 0.8788, lr_0 = 6.5201e-04
Loss = 1.5275e-01, PNorm = 66.5058, GNorm = 0.6971, lr_0 = 6.5157e-04
Loss = 1.6176e-01, PNorm = 66.5240, GNorm = 0.5961, lr_0 = 6.5112e-04
Loss = 1.4632e-01, PNorm = 66.5433, GNorm = 0.9035, lr_0 = 6.5067e-04
Loss = 1.5852e-01, PNorm = 66.5636, GNorm = 0.8901, lr_0 = 6.5023e-04
Loss = 1.5104e-01, PNorm = 66.5870, GNorm = 1.6987, lr_0 = 6.4978e-04
Loss = 1.6588e-01, PNorm = 66.6184, GNorm = 1.2919, lr_0 = 6.4934e-04
Loss = 1.5676e-01, PNorm = 66.6417, GNorm = 0.5923, lr_0 = 6.4889e-04
Loss = 1.6512e-01, PNorm = 66.6577, GNorm = 0.7638, lr_0 = 6.4845e-04
Loss = 1.5447e-01, PNorm = 66.6757, GNorm = 0.7783, lr_0 = 6.4800e-04
Loss = 1.4899e-01, PNorm = 66.6973, GNorm = 0.7875, lr_0 = 6.4756e-04
Loss = 1.5085e-01, PNorm = 66.7236, GNorm = 1.0473, lr_0 = 6.4712e-04
Loss = 1.6071e-01, PNorm = 66.7496, GNorm = 0.8174, lr_0 = 6.4667e-04
Loss = 1.5283e-01, PNorm = 66.7730, GNorm = 1.1583, lr_0 = 6.4623e-04
Loss = 1.5914e-01, PNorm = 66.7981, GNorm = 0.7466, lr_0 = 6.4579e-04
Loss = 1.5456e-01, PNorm = 66.8247, GNorm = 0.7016, lr_0 = 6.4534e-04
Loss = 1.6256e-01, PNorm = 66.8455, GNorm = 0.7537, lr_0 = 6.4490e-04
Loss = 1.7274e-01, PNorm = 66.8602, GNorm = 1.8435, lr_0 = 6.4446e-04
Loss = 1.8818e-01, PNorm = 66.8734, GNorm = 1.2606, lr_0 = 6.4402e-04
Loss = 1.5211e-01, PNorm = 66.8951, GNorm = 1.4722, lr_0 = 6.4358e-04
Loss = 1.8758e-01, PNorm = 66.9147, GNorm = 0.8939, lr_0 = 6.4314e-04
Loss = 1.6089e-01, PNorm = 66.9311, GNorm = 0.8841, lr_0 = 6.4270e-04
Loss = 1.5038e-01, PNorm = 66.9539, GNorm = 0.8447, lr_0 = 6.4226e-04
Loss = 1.6349e-01, PNorm = 66.9705, GNorm = 1.0196, lr_0 = 6.4182e-04
Loss = 1.5369e-01, PNorm = 66.9940, GNorm = 1.1943, lr_0 = 6.4138e-04
Loss = 1.5020e-01, PNorm = 67.0154, GNorm = 0.6310, lr_0 = 6.4094e-04
Loss = 1.5173e-01, PNorm = 67.0264, GNorm = 0.6602, lr_0 = 6.4050e-04
Loss = 1.4816e-01, PNorm = 67.0452, GNorm = 0.8107, lr_0 = 6.4006e-04
Loss = 1.4397e-01, PNorm = 67.0682, GNorm = 0.6939, lr_0 = 6.3962e-04
Loss = 1.5577e-01, PNorm = 67.0918, GNorm = 1.4230, lr_0 = 6.3918e-04
Loss = 1.4867e-01, PNorm = 67.1011, GNorm = 0.6049, lr_0 = 6.3874e-04
Loss = 1.5823e-01, PNorm = 67.1157, GNorm = 0.7752, lr_0 = 6.3831e-04
Loss = 1.5350e-01, PNorm = 67.1344, GNorm = 0.6732, lr_0 = 6.3787e-04
Loss = 1.5381e-01, PNorm = 67.1531, GNorm = 0.7511, lr_0 = 6.3743e-04
Loss = 1.3627e-01, PNorm = 67.1703, GNorm = 1.0894, lr_0 = 6.3700e-04
Loss = 1.5417e-01, PNorm = 67.1860, GNorm = 0.9218, lr_0 = 6.3656e-04
Loss = 1.4674e-01, PNorm = 67.2032, GNorm = 0.6515, lr_0 = 6.3612e-04
Loss = 1.6401e-01, PNorm = 67.2196, GNorm = 0.7769, lr_0 = 6.3569e-04
Loss = 1.4666e-01, PNorm = 67.2445, GNorm = 0.5862, lr_0 = 6.3525e-04
Loss = 1.4020e-01, PNorm = 67.2590, GNorm = 0.5791, lr_0 = 6.3482e-04
Loss = 1.6547e-01, PNorm = 67.2808, GNorm = 0.7901, lr_0 = 6.3438e-04
Loss = 1.7143e-01, PNorm = 67.2982, GNorm = 0.6448, lr_0 = 6.3395e-04
Loss = 1.3723e-01, PNorm = 67.3216, GNorm = 0.7827, lr_0 = 6.3351e-04
Loss = 1.5887e-01, PNorm = 67.3404, GNorm = 0.7603, lr_0 = 6.3308e-04
Loss = 1.4846e-01, PNorm = 67.3618, GNorm = 1.4565, lr_0 = 6.3265e-04
Loss = 1.4754e-01, PNorm = 67.3761, GNorm = 1.0791, lr_0 = 6.3221e-04
Loss = 1.5535e-01, PNorm = 67.3980, GNorm = 1.0833, lr_0 = 6.3178e-04
Loss = 1.5007e-01, PNorm = 67.4222, GNorm = 1.2724, lr_0 = 6.3135e-04
Loss = 1.6519e-01, PNorm = 67.4482, GNorm = 0.9427, lr_0 = 6.3091e-04
Loss = 1.6480e-01, PNorm = 67.4764, GNorm = 1.3106, lr_0 = 6.3048e-04
Loss = 1.4898e-01, PNorm = 67.4943, GNorm = 0.5397, lr_0 = 6.3005e-04
Loss = 1.4905e-01, PNorm = 67.5073, GNorm = 1.3429, lr_0 = 6.2962e-04
Loss = 1.7706e-01, PNorm = 67.5138, GNorm = 0.9250, lr_0 = 6.2919e-04
Loss = 1.4157e-01, PNorm = 67.5340, GNorm = 1.2993, lr_0 = 6.2876e-04
Loss = 1.6117e-01, PNorm = 67.5508, GNorm = 0.7768, lr_0 = 6.2833e-04
Loss = 1.6680e-01, PNorm = 67.5746, GNorm = 0.8566, lr_0 = 6.2789e-04
Loss = 1.4923e-01, PNorm = 67.5913, GNorm = 0.6078, lr_0 = 6.2746e-04
Loss = 1.7288e-01, PNorm = 67.6088, GNorm = 1.1924, lr_0 = 6.2703e-04
Loss = 1.6064e-01, PNorm = 67.6317, GNorm = 1.4190, lr_0 = 6.2661e-04
Loss = 1.6331e-01, PNorm = 67.6567, GNorm = 0.9059, lr_0 = 6.2618e-04
Loss = 1.6666e-01, PNorm = 67.6808, GNorm = 0.7127, lr_0 = 6.2575e-04
Loss = 1.5150e-01, PNorm = 67.7023, GNorm = 0.7302, lr_0 = 6.2532e-04
Loss = 1.5200e-01, PNorm = 67.7181, GNorm = 1.5525, lr_0 = 6.2489e-04
Loss = 1.4012e-01, PNorm = 67.7426, GNorm = 0.5288, lr_0 = 6.2446e-04
Loss = 1.6097e-01, PNorm = 67.7610, GNorm = 0.7728, lr_0 = 6.2403e-04
Loss = 1.4350e-01, PNorm = 67.7822, GNorm = 0.9326, lr_0 = 6.2361e-04
Loss = 1.3583e-01, PNorm = 67.7938, GNorm = 0.6996, lr_0 = 6.2318e-04
Loss = 1.5168e-01, PNorm = 67.8143, GNorm = 1.0434, lr_0 = 6.2275e-04
Loss = 1.4995e-01, PNorm = 67.8287, GNorm = 0.5623, lr_0 = 6.2233e-04
Loss = 1.7201e-01, PNorm = 67.8464, GNorm = 1.0096, lr_0 = 6.2190e-04
Loss = 1.5624e-01, PNorm = 67.8624, GNorm = 0.5141, lr_0 = 6.2147e-04
Loss = 1.5117e-01, PNorm = 67.8811, GNorm = 0.6548, lr_0 = 6.2105e-04
Loss = 1.7236e-01, PNorm = 67.9022, GNorm = 1.0514, lr_0 = 6.2062e-04
Loss = 1.6008e-01, PNorm = 67.9291, GNorm = 0.6915, lr_0 = 6.2020e-04
Loss = 1.4191e-01, PNorm = 67.9525, GNorm = 0.9673, lr_0 = 6.1977e-04
Loss = 1.6281e-01, PNorm = 67.9755, GNorm = 0.6980, lr_0 = 6.1935e-04
Loss = 1.7677e-01, PNorm = 67.9949, GNorm = 0.9427, lr_0 = 6.1892e-04
Loss = 1.5130e-01, PNorm = 68.0112, GNorm = 1.1633, lr_0 = 6.1850e-04
Loss = 1.6073e-01, PNorm = 68.0237, GNorm = 0.9836, lr_0 = 6.1808e-04
Loss = 1.4693e-01, PNorm = 68.0458, GNorm = 1.0596, lr_0 = 6.1765e-04
Loss = 1.7427e-01, PNorm = 68.0650, GNorm = 0.6305, lr_0 = 6.1723e-04
Loss = 1.6431e-01, PNorm = 68.0873, GNorm = 0.8867, lr_0 = 6.1681e-04
Loss = 1.3834e-01, PNorm = 68.1098, GNorm = 0.5515, lr_0 = 6.1638e-04
Loss = 1.6270e-01, PNorm = 68.1250, GNorm = 0.7566, lr_0 = 6.1596e-04
Loss = 1.4774e-01, PNorm = 68.1428, GNorm = 2.3273, lr_0 = 6.1554e-04
Loss = 1.6932e-01, PNorm = 68.1652, GNorm = 0.8114, lr_0 = 6.1512e-04
Loss = 1.6749e-01, PNorm = 68.1908, GNorm = 1.0511, lr_0 = 6.1470e-04
Loss = 1.4090e-01, PNorm = 68.2104, GNorm = 0.7228, lr_0 = 6.1428e-04
Loss = 1.6443e-01, PNorm = 68.2281, GNorm = 0.7561, lr_0 = 6.1385e-04
Loss = 1.5240e-01, PNorm = 68.2414, GNorm = 0.6766, lr_0 = 6.1343e-04
Loss = 1.5181e-01, PNorm = 68.2659, GNorm = 0.8361, lr_0 = 6.1301e-04
Loss = 1.5800e-01, PNorm = 68.2846, GNorm = 1.4579, lr_0 = 6.1259e-04
Loss = 1.6607e-01, PNorm = 68.3064, GNorm = 1.3275, lr_0 = 6.1217e-04
Loss = 1.7450e-01, PNorm = 68.3207, GNorm = 0.8675, lr_0 = 6.1175e-04
Loss = 1.4602e-01, PNorm = 68.3442, GNorm = 1.3361, lr_0 = 6.1134e-04
Loss = 1.4203e-01, PNorm = 68.3655, GNorm = 0.6938, lr_0 = 6.1092e-04
Loss = 1.5653e-01, PNorm = 68.3875, GNorm = 0.5312, lr_0 = 6.1050e-04
Validation mae = 0.244791
Epoch 8
Loss = 1.4654e-01, PNorm = 68.4035, GNorm = 0.7401, lr_0 = 6.1008e-04
Loss = 1.3989e-01, PNorm = 68.4234, GNorm = 1.2094, lr_0 = 6.0966e-04
Loss = 1.4820e-01, PNorm = 68.4413, GNorm = 0.7012, lr_0 = 6.0924e-04
Loss = 1.4003e-01, PNorm = 68.4603, GNorm = 0.7491, lr_0 = 6.0883e-04
Loss = 1.4280e-01, PNorm = 68.4765, GNorm = 0.7568, lr_0 = 6.0841e-04
Loss = 1.3884e-01, PNorm = 68.4989, GNorm = 0.8080, lr_0 = 6.0799e-04
Loss = 1.3440e-01, PNorm = 68.5216, GNorm = 1.0528, lr_0 = 6.0758e-04
Loss = 1.4199e-01, PNorm = 68.5382, GNorm = 1.4592, lr_0 = 6.0716e-04
Loss = 1.4568e-01, PNorm = 68.5570, GNorm = 0.6314, lr_0 = 6.0674e-04
Loss = 1.5707e-01, PNorm = 68.5761, GNorm = 0.9852, lr_0 = 6.0633e-04
Loss = 1.5444e-01, PNorm = 68.6004, GNorm = 0.7157, lr_0 = 6.0591e-04
Loss = 1.3999e-01, PNorm = 68.6211, GNorm = 0.7283, lr_0 = 6.0550e-04
Loss = 1.3416e-01, PNorm = 68.6381, GNorm = 0.9074, lr_0 = 6.0508e-04
Loss = 1.4570e-01, PNorm = 68.6537, GNorm = 0.8928, lr_0 = 6.0467e-04
Loss = 1.5821e-01, PNorm = 68.6676, GNorm = 0.6869, lr_0 = 6.0425e-04
Loss = 1.5149e-01, PNorm = 68.6849, GNorm = 1.2089, lr_0 = 6.0384e-04
Loss = 1.5078e-01, PNorm = 68.7084, GNorm = 1.6106, lr_0 = 6.0343e-04
Loss = 1.5260e-01, PNorm = 68.7259, GNorm = 0.4901, lr_0 = 6.0301e-04
Loss = 1.3220e-01, PNorm = 68.7453, GNorm = 0.7206, lr_0 = 6.0260e-04
Loss = 1.3740e-01, PNorm = 68.7624, GNorm = 0.6659, lr_0 = 6.0219e-04
Loss = 1.3977e-01, PNorm = 68.7776, GNorm = 0.8188, lr_0 = 6.0178e-04
Loss = 1.3262e-01, PNorm = 68.7995, GNorm = 0.9315, lr_0 = 6.0136e-04
Loss = 1.4572e-01, PNorm = 68.8172, GNorm = 0.6246, lr_0 = 6.0095e-04
Loss = 1.3624e-01, PNorm = 68.8372, GNorm = 0.7022, lr_0 = 6.0054e-04
Loss = 1.5937e-01, PNorm = 68.8540, GNorm = 0.9035, lr_0 = 6.0013e-04
Loss = 1.5281e-01, PNorm = 68.8722, GNorm = 0.5061, lr_0 = 5.9972e-04
Loss = 1.6237e-01, PNorm = 68.8961, GNorm = 0.6551, lr_0 = 5.9931e-04
Loss = 1.5328e-01, PNorm = 68.9145, GNorm = 1.5691, lr_0 = 5.9890e-04
Loss = 1.6209e-01, PNorm = 68.9309, GNorm = 0.6983, lr_0 = 5.9849e-04
Loss = 1.4239e-01, PNorm = 68.9428, GNorm = 0.6586, lr_0 = 5.9808e-04
Loss = 1.4694e-01, PNorm = 68.9635, GNorm = 1.4029, lr_0 = 5.9767e-04
Loss = 1.3774e-01, PNorm = 68.9817, GNorm = 1.0987, lr_0 = 5.9726e-04
Loss = 1.6417e-01, PNorm = 69.0033, GNorm = 0.9171, lr_0 = 5.9685e-04
Loss = 1.5224e-01, PNorm = 69.0227, GNorm = 0.7030, lr_0 = 5.9644e-04
Loss = 1.5302e-01, PNorm = 69.0478, GNorm = 0.9089, lr_0 = 5.9603e-04
Loss = 1.3027e-01, PNorm = 69.0583, GNorm = 0.6893, lr_0 = 5.9562e-04
Loss = 1.3873e-01, PNorm = 69.0789, GNorm = 0.8557, lr_0 = 5.9521e-04
Loss = 1.4617e-01, PNorm = 69.0935, GNorm = 0.5585, lr_0 = 5.9481e-04
Loss = 1.6692e-01, PNorm = 69.1197, GNorm = 0.6147, lr_0 = 5.9440e-04
Loss = 1.3587e-01, PNorm = 69.1383, GNorm = 0.5173, lr_0 = 5.9399e-04
Loss = 1.3673e-01, PNorm = 69.1646, GNorm = 0.8266, lr_0 = 5.9358e-04
Loss = 1.3935e-01, PNorm = 69.1849, GNorm = 0.8504, lr_0 = 5.9318e-04
Loss = 1.4482e-01, PNorm = 69.2016, GNorm = 0.7022, lr_0 = 5.9277e-04
Loss = 1.5251e-01, PNorm = 69.2236, GNorm = 0.5769, lr_0 = 5.9236e-04
Loss = 1.3895e-01, PNorm = 69.2411, GNorm = 0.9327, lr_0 = 5.9196e-04
Loss = 1.3940e-01, PNorm = 69.2535, GNorm = 0.8331, lr_0 = 5.9155e-04
Loss = 1.5719e-01, PNorm = 69.2698, GNorm = 1.1574, lr_0 = 5.9115e-04
Loss = 1.4517e-01, PNorm = 69.2837, GNorm = 0.6994, lr_0 = 5.9074e-04
Loss = 1.4350e-01, PNorm = 69.3113, GNorm = 1.0766, lr_0 = 5.9034e-04
Loss = 1.6109e-01, PNorm = 69.3338, GNorm = 1.5550, lr_0 = 5.8993e-04
Loss = 1.4352e-01, PNorm = 69.3546, GNorm = 0.7406, lr_0 = 5.8953e-04
Loss = 1.5670e-01, PNorm = 69.3709, GNorm = 0.6297, lr_0 = 5.8913e-04
Loss = 1.5194e-01, PNorm = 69.3911, GNorm = 0.5325, lr_0 = 5.8872e-04
Loss = 1.5019e-01, PNorm = 69.4099, GNorm = 0.5909, lr_0 = 5.8832e-04
Loss = 1.3031e-01, PNorm = 69.4354, GNorm = 1.3191, lr_0 = 5.8792e-04
Loss = 1.4582e-01, PNorm = 69.4491, GNorm = 0.7130, lr_0 = 5.8751e-04
Loss = 1.3337e-01, PNorm = 69.4621, GNorm = 1.1041, lr_0 = 5.8711e-04
Loss = 1.5884e-01, PNorm = 69.4733, GNorm = 0.6947, lr_0 = 5.8671e-04
Loss = 1.6187e-01, PNorm = 69.4986, GNorm = 0.9082, lr_0 = 5.8631e-04
Loss = 1.6302e-01, PNorm = 69.5270, GNorm = 0.8762, lr_0 = 5.8591e-04
Loss = 1.4058e-01, PNorm = 69.5534, GNorm = 0.6059, lr_0 = 5.8550e-04
Loss = 1.4867e-01, PNorm = 69.5721, GNorm = 0.6694, lr_0 = 5.8510e-04
Loss = 1.3936e-01, PNorm = 69.5938, GNorm = 0.8536, lr_0 = 5.8470e-04
Loss = 1.5334e-01, PNorm = 69.6063, GNorm = 0.9790, lr_0 = 5.8430e-04
Loss = 1.4279e-01, PNorm = 69.6222, GNorm = 0.7636, lr_0 = 5.8390e-04
Loss = 1.4460e-01, PNorm = 69.6383, GNorm = 1.8132, lr_0 = 5.8350e-04
Loss = 1.6595e-01, PNorm = 69.6573, GNorm = 1.7194, lr_0 = 5.8310e-04
Loss = 1.7147e-01, PNorm = 69.6791, GNorm = 1.2929, lr_0 = 5.8270e-04
Loss = 1.7820e-01, PNorm = 69.6975, GNorm = 1.0309, lr_0 = 5.8230e-04
Loss = 1.3565e-01, PNorm = 69.7176, GNorm = 0.6861, lr_0 = 5.8190e-04
Loss = 1.4930e-01, PNorm = 69.7380, GNorm = 1.0485, lr_0 = 5.8151e-04
Loss = 1.5458e-01, PNorm = 69.7550, GNorm = 0.6790, lr_0 = 5.8111e-04
Loss = 1.2785e-01, PNorm = 69.7771, GNorm = 0.9173, lr_0 = 5.8071e-04
Loss = 1.4760e-01, PNorm = 69.7933, GNorm = 0.5644, lr_0 = 5.8031e-04
Loss = 1.4678e-01, PNorm = 69.8050, GNorm = 0.6790, lr_0 = 5.7991e-04
Loss = 1.4988e-01, PNorm = 69.8166, GNorm = 0.7371, lr_0 = 5.7952e-04
Loss = 1.5431e-01, PNorm = 69.8417, GNorm = 0.7513, lr_0 = 5.7912e-04
Loss = 1.5249e-01, PNorm = 69.8556, GNorm = 1.6869, lr_0 = 5.7872e-04
Loss = 1.3274e-01, PNorm = 69.8752, GNorm = 1.4975, lr_0 = 5.7833e-04
Loss = 1.3133e-01, PNorm = 69.8948, GNorm = 0.7642, lr_0 = 5.7793e-04
Loss = 1.5898e-01, PNorm = 69.9141, GNorm = 0.6777, lr_0 = 5.7753e-04
Loss = 1.4523e-01, PNorm = 69.9352, GNorm = 0.6117, lr_0 = 5.7714e-04
Loss = 1.6556e-01, PNorm = 69.9532, GNorm = 0.7645, lr_0 = 5.7674e-04
Loss = 1.3281e-01, PNorm = 69.9715, GNorm = 0.7147, lr_0 = 5.7635e-04
Loss = 1.3266e-01, PNorm = 69.9880, GNorm = 0.7902, lr_0 = 5.7595e-04
Loss = 1.6037e-01, PNorm = 70.0038, GNorm = 0.6995, lr_0 = 5.7556e-04
Loss = 1.6989e-01, PNorm = 70.0147, GNorm = 0.6295, lr_0 = 5.7516e-04
Loss = 1.5021e-01, PNorm = 70.0341, GNorm = 0.8536, lr_0 = 5.7477e-04
Loss = 1.6899e-01, PNorm = 70.0458, GNorm = 1.0021, lr_0 = 5.7438e-04
Loss = 1.6517e-01, PNorm = 70.0626, GNorm = 0.9987, lr_0 = 5.7398e-04
Loss = 1.7814e-01, PNorm = 70.0791, GNorm = 0.5671, lr_0 = 5.7359e-04
Loss = 1.6247e-01, PNorm = 70.1005, GNorm = 1.0117, lr_0 = 5.7320e-04
Loss = 1.3815e-01, PNorm = 70.1241, GNorm = 0.5924, lr_0 = 5.7280e-04
Loss = 1.6448e-01, PNorm = 70.1446, GNorm = 0.5523, lr_0 = 5.7241e-04
Loss = 1.3271e-01, PNorm = 70.1613, GNorm = 1.1065, lr_0 = 5.7202e-04
Loss = 1.3478e-01, PNorm = 70.1784, GNorm = 0.7566, lr_0 = 5.7163e-04
Loss = 1.5821e-01, PNorm = 70.1945, GNorm = 1.1101, lr_0 = 5.7124e-04
Loss = 1.5250e-01, PNorm = 70.2155, GNorm = 0.9908, lr_0 = 5.7084e-04
Loss = 1.4949e-01, PNorm = 70.2386, GNorm = 0.6836, lr_0 = 5.7045e-04
Loss = 1.5579e-01, PNorm = 70.2558, GNorm = 0.5594, lr_0 = 5.7006e-04
Loss = 1.4827e-01, PNorm = 70.2718, GNorm = 1.1625, lr_0 = 5.6967e-04
Loss = 1.4775e-01, PNorm = 70.2820, GNorm = 1.0760, lr_0 = 5.6928e-04
Loss = 1.6652e-01, PNorm = 70.2981, GNorm = 2.1772, lr_0 = 5.6889e-04
Loss = 1.5233e-01, PNorm = 70.3177, GNorm = 0.7814, lr_0 = 5.6850e-04
Loss = 1.5016e-01, PNorm = 70.3451, GNorm = 0.8297, lr_0 = 5.6811e-04
Loss = 1.4548e-01, PNorm = 70.3701, GNorm = 0.6122, lr_0 = 5.6772e-04
Loss = 1.3482e-01, PNorm = 70.3909, GNorm = 0.6210, lr_0 = 5.6733e-04
Loss = 1.5303e-01, PNorm = 70.4083, GNorm = 0.6805, lr_0 = 5.6695e-04
Loss = 1.5570e-01, PNorm = 70.4197, GNorm = 0.9473, lr_0 = 5.6656e-04
Loss = 1.5416e-01, PNorm = 70.4393, GNorm = 1.3177, lr_0 = 5.6617e-04
Loss = 1.6151e-01, PNorm = 70.4521, GNorm = 1.2455, lr_0 = 5.6578e-04
Loss = 1.3427e-01, PNorm = 70.4694, GNorm = 0.6179, lr_0 = 5.6539e-04
Loss = 1.4885e-01, PNorm = 70.4853, GNorm = 0.6772, lr_0 = 5.6501e-04
Loss = 1.5522e-01, PNorm = 70.4999, GNorm = 1.3309, lr_0 = 5.6462e-04
Loss = 1.4688e-01, PNorm = 70.5166, GNorm = 0.6244, lr_0 = 5.6423e-04
Loss = 1.6724e-01, PNorm = 70.5420, GNorm = 1.1089, lr_0 = 5.6385e-04
Loss = 1.8144e-01, PNorm = 70.5584, GNorm = 1.0741, lr_0 = 5.6346e-04
Loss = 1.5218e-01, PNorm = 70.5709, GNorm = 0.9198, lr_0 = 5.6307e-04
Loss = 1.6099e-01, PNorm = 70.5805, GNorm = 0.6115, lr_0 = 5.6269e-04
Loss = 1.5939e-01, PNorm = 70.6002, GNorm = 0.5379, lr_0 = 5.6230e-04
Validation mae = 0.248092
Epoch 9
Loss = 1.3035e-01, PNorm = 70.6191, GNorm = 0.5854, lr_0 = 5.6192e-04
Loss = 1.3212e-01, PNorm = 70.6347, GNorm = 0.8518, lr_0 = 5.6153e-04
Loss = 1.2864e-01, PNorm = 70.6521, GNorm = 0.9645, lr_0 = 5.6115e-04
Loss = 1.4266e-01, PNorm = 70.6639, GNorm = 0.5241, lr_0 = 5.6076e-04
Loss = 1.4795e-01, PNorm = 70.6836, GNorm = 0.7620, lr_0 = 5.6038e-04
Loss = 1.2927e-01, PNorm = 70.7039, GNorm = 0.6201, lr_0 = 5.6000e-04
Loss = 1.5163e-01, PNorm = 70.7209, GNorm = 1.3344, lr_0 = 5.5961e-04
Loss = 1.3413e-01, PNorm = 70.7349, GNorm = 0.7439, lr_0 = 5.5923e-04
Loss = 1.4591e-01, PNorm = 70.7574, GNorm = 0.6521, lr_0 = 5.5885e-04
Loss = 1.5777e-01, PNorm = 70.7740, GNorm = 0.7266, lr_0 = 5.5846e-04
Loss = 1.3106e-01, PNorm = 70.7956, GNorm = 1.6285, lr_0 = 5.5808e-04
Loss = 1.3425e-01, PNorm = 70.8068, GNorm = 0.9107, lr_0 = 5.5770e-04
Loss = 1.4524e-01, PNorm = 70.8201, GNorm = 0.6569, lr_0 = 5.5732e-04
Loss = 1.3043e-01, PNorm = 70.8313, GNorm = 0.6216, lr_0 = 5.5693e-04
Loss = 1.3505e-01, PNorm = 70.8427, GNorm = 0.7545, lr_0 = 5.5655e-04
Loss = 1.4095e-01, PNorm = 70.8619, GNorm = 0.7315, lr_0 = 5.5617e-04
Loss = 1.6540e-01, PNorm = 70.8811, GNorm = 1.1369, lr_0 = 5.5579e-04
Loss = 1.4308e-01, PNorm = 70.8994, GNorm = 1.0665, lr_0 = 5.5541e-04
Loss = 1.3051e-01, PNorm = 70.9152, GNorm = 0.6009, lr_0 = 5.5503e-04
Loss = 1.3461e-01, PNorm = 70.9319, GNorm = 0.8970, lr_0 = 5.5465e-04
Loss = 1.3374e-01, PNorm = 70.9453, GNorm = 0.7153, lr_0 = 5.5427e-04
Loss = 1.4186e-01, PNorm = 70.9530, GNorm = 0.6439, lr_0 = 5.5389e-04
Loss = 1.4545e-01, PNorm = 70.9618, GNorm = 0.9912, lr_0 = 5.5351e-04
Loss = 1.2746e-01, PNorm = 70.9699, GNorm = 0.6651, lr_0 = 5.5313e-04
Loss = 1.3106e-01, PNorm = 70.9876, GNorm = 1.1777, lr_0 = 5.5275e-04
Loss = 1.3355e-01, PNorm = 71.0050, GNorm = 0.6350, lr_0 = 5.5237e-04
Loss = 1.3576e-01, PNorm = 71.0247, GNorm = 0.8561, lr_0 = 5.5199e-04
Loss = 1.3373e-01, PNorm = 71.0397, GNorm = 1.1322, lr_0 = 5.5162e-04
Loss = 1.3252e-01, PNorm = 71.0599, GNorm = 0.4754, lr_0 = 5.5124e-04
Loss = 1.3023e-01, PNorm = 71.0781, GNorm = 0.6958, lr_0 = 5.5086e-04
Loss = 1.5213e-01, PNorm = 71.0941, GNorm = 1.3038, lr_0 = 5.5048e-04
Loss = 1.2688e-01, PNorm = 71.1150, GNorm = 0.7183, lr_0 = 5.5011e-04
Loss = 1.3773e-01, PNorm = 71.1365, GNorm = 1.0673, lr_0 = 5.4973e-04
Loss = 1.5362e-01, PNorm = 71.1566, GNorm = 0.9236, lr_0 = 5.4935e-04
Loss = 1.5637e-01, PNorm = 71.1725, GNorm = 1.9611, lr_0 = 5.4898e-04
Loss = 1.6338e-01, PNorm = 71.1883, GNorm = 1.0105, lr_0 = 5.4860e-04
Loss = 1.5629e-01, PNorm = 71.2054, GNorm = 0.7846, lr_0 = 5.4822e-04
Loss = 1.3494e-01, PNorm = 71.2250, GNorm = 0.8049, lr_0 = 5.4785e-04
Loss = 1.4188e-01, PNorm = 71.2402, GNorm = 0.8255, lr_0 = 5.4747e-04
Loss = 1.4737e-01, PNorm = 71.2580, GNorm = 1.1851, lr_0 = 5.4710e-04
Loss = 1.3580e-01, PNorm = 71.2777, GNorm = 0.5490, lr_0 = 5.4672e-04
Loss = 1.3681e-01, PNorm = 71.2957, GNorm = 0.5519, lr_0 = 5.4635e-04
Loss = 1.3589e-01, PNorm = 71.3146, GNorm = 0.6977, lr_0 = 5.4597e-04
Loss = 1.4993e-01, PNorm = 71.3295, GNorm = 0.6312, lr_0 = 5.4560e-04
Loss = 1.2368e-01, PNorm = 71.3447, GNorm = 1.0903, lr_0 = 5.4523e-04
Loss = 1.3490e-01, PNorm = 71.3611, GNorm = 0.6022, lr_0 = 5.4485e-04
Loss = 1.6466e-01, PNorm = 71.3780, GNorm = 0.8890, lr_0 = 5.4448e-04
Loss = 1.3933e-01, PNorm = 71.3926, GNorm = 0.6876, lr_0 = 5.4411e-04
Loss = 1.4427e-01, PNorm = 71.4084, GNorm = 0.8704, lr_0 = 5.4373e-04
Loss = 1.4172e-01, PNorm = 71.4288, GNorm = 0.9886, lr_0 = 5.4336e-04
Loss = 1.4671e-01, PNorm = 71.4491, GNorm = 0.8139, lr_0 = 5.4299e-04
Loss = 1.4412e-01, PNorm = 71.4685, GNorm = 0.5497, lr_0 = 5.4262e-04
Loss = 1.4511e-01, PNorm = 71.4915, GNorm = 0.6669, lr_0 = 5.4225e-04
Loss = 1.6219e-01, PNorm = 71.5035, GNorm = 0.6244, lr_0 = 5.4187e-04
Loss = 1.4397e-01, PNorm = 71.5204, GNorm = 1.2226, lr_0 = 5.4150e-04
Loss = 1.3834e-01, PNorm = 71.5341, GNorm = 0.7437, lr_0 = 5.4113e-04
Loss = 1.3086e-01, PNorm = 71.5473, GNorm = 0.7510, lr_0 = 5.4076e-04
Loss = 1.3314e-01, PNorm = 71.5573, GNorm = 0.4712, lr_0 = 5.4039e-04
Loss = 1.5190e-01, PNorm = 71.5792, GNorm = 0.9975, lr_0 = 5.4002e-04
Loss = 1.3342e-01, PNorm = 71.6059, GNorm = 0.6030, lr_0 = 5.3965e-04
Loss = 1.3706e-01, PNorm = 71.6252, GNorm = 0.5550, lr_0 = 5.3928e-04
Loss = 1.3853e-01, PNorm = 71.6405, GNorm = 0.5892, lr_0 = 5.3891e-04
Loss = 1.2658e-01, PNorm = 71.6537, GNorm = 0.6287, lr_0 = 5.3854e-04
Loss = 1.6067e-01, PNorm = 71.6650, GNorm = 1.2224, lr_0 = 5.3817e-04
Loss = 1.3442e-01, PNorm = 71.6779, GNorm = 0.9060, lr_0 = 5.3781e-04
Loss = 1.4717e-01, PNorm = 71.6921, GNorm = 0.5163, lr_0 = 5.3744e-04
Loss = 1.2776e-01, PNorm = 71.7079, GNorm = 0.5799, lr_0 = 5.3707e-04
Loss = 1.5735e-01, PNorm = 71.7273, GNorm = 0.8219, lr_0 = 5.3670e-04
Loss = 1.5343e-01, PNorm = 71.7457, GNorm = 0.8994, lr_0 = 5.3633e-04
Loss = 1.5486e-01, PNorm = 71.7665, GNorm = 0.8854, lr_0 = 5.3597e-04
Loss = 1.3358e-01, PNorm = 71.7825, GNorm = 0.4868, lr_0 = 5.3560e-04
Loss = 1.3933e-01, PNorm = 71.7952, GNorm = 0.7326, lr_0 = 5.3523e-04
Loss = 1.3027e-01, PNorm = 71.8048, GNorm = 0.5152, lr_0 = 5.3486e-04
Loss = 1.3911e-01, PNorm = 71.8155, GNorm = 0.8067, lr_0 = 5.3450e-04
Loss = 1.4771e-01, PNorm = 71.8256, GNorm = 1.4544, lr_0 = 5.3413e-04
Loss = 1.3911e-01, PNorm = 71.8437, GNorm = 0.5626, lr_0 = 5.3377e-04
Loss = 1.4482e-01, PNorm = 71.8658, GNorm = 1.0798, lr_0 = 5.3340e-04
Loss = 1.4233e-01, PNorm = 71.8819, GNorm = 0.4791, lr_0 = 5.3304e-04
Loss = 1.3591e-01, PNorm = 71.8997, GNorm = 0.6383, lr_0 = 5.3267e-04
Loss = 1.3456e-01, PNorm = 71.9121, GNorm = 0.7936, lr_0 = 5.3231e-04
Loss = 1.4086e-01, PNorm = 71.9174, GNorm = 0.5766, lr_0 = 5.3194e-04
Loss = 1.4094e-01, PNorm = 71.9285, GNorm = 0.6802, lr_0 = 5.3158e-04
Loss = 1.4200e-01, PNorm = 71.9468, GNorm = 0.9798, lr_0 = 5.3121e-04
Loss = 1.3373e-01, PNorm = 71.9690, GNorm = 0.8673, lr_0 = 5.3085e-04
Loss = 1.4439e-01, PNorm = 71.9873, GNorm = 0.7613, lr_0 = 5.3048e-04
Loss = 1.2058e-01, PNorm = 72.0059, GNorm = 0.5128, lr_0 = 5.3012e-04
Loss = 1.4817e-01, PNorm = 72.0207, GNorm = 0.8779, lr_0 = 5.2976e-04
Loss = 1.5047e-01, PNorm = 72.0397, GNorm = 0.9089, lr_0 = 5.2939e-04
Loss = 1.2828e-01, PNorm = 72.0515, GNorm = 0.9075, lr_0 = 5.2903e-04
Loss = 1.3015e-01, PNorm = 72.0697, GNorm = 0.6464, lr_0 = 5.2867e-04
Loss = 1.5876e-01, PNorm = 72.0794, GNorm = 1.5812, lr_0 = 5.2831e-04
Loss = 1.3949e-01, PNorm = 72.0942, GNorm = 0.7327, lr_0 = 5.2795e-04
Loss = 1.5205e-01, PNorm = 72.1153, GNorm = 0.7932, lr_0 = 5.2758e-04
Loss = 1.4956e-01, PNorm = 72.1301, GNorm = 1.2228, lr_0 = 5.2722e-04
Loss = 1.2145e-01, PNorm = 72.1411, GNorm = 0.5195, lr_0 = 5.2686e-04
Loss = 1.5274e-01, PNorm = 72.1524, GNorm = 0.8790, lr_0 = 5.2650e-04
Loss = 1.3976e-01, PNorm = 72.1674, GNorm = 0.7815, lr_0 = 5.2614e-04
Loss = 1.3968e-01, PNorm = 72.1873, GNorm = 0.8985, lr_0 = 5.2578e-04
Loss = 1.6075e-01, PNorm = 72.2000, GNorm = 0.7097, lr_0 = 5.2542e-04
Loss = 1.4846e-01, PNorm = 72.2199, GNorm = 0.6729, lr_0 = 5.2506e-04
Loss = 1.4709e-01, PNorm = 72.2377, GNorm = 0.7278, lr_0 = 5.2470e-04
Loss = 1.2144e-01, PNorm = 72.2543, GNorm = 0.7725, lr_0 = 5.2434e-04
Loss = 1.4103e-01, PNorm = 72.2690, GNorm = 0.8483, lr_0 = 5.2398e-04
Loss = 1.4637e-01, PNorm = 72.2880, GNorm = 1.2936, lr_0 = 5.2362e-04
Loss = 1.3098e-01, PNorm = 72.3094, GNorm = 0.6895, lr_0 = 5.2326e-04
Loss = 1.2014e-01, PNorm = 72.3321, GNorm = 0.6084, lr_0 = 5.2290e-04
Loss = 1.4975e-01, PNorm = 72.3445, GNorm = 0.7205, lr_0 = 5.2255e-04
Loss = 1.4652e-01, PNorm = 72.3563, GNorm = 1.1385, lr_0 = 5.2219e-04
Loss = 1.3806e-01, PNorm = 72.3669, GNorm = 0.7975, lr_0 = 5.2183e-04
Loss = 1.6328e-01, PNorm = 72.3826, GNorm = 0.9814, lr_0 = 5.2147e-04
Loss = 1.4472e-01, PNorm = 72.3882, GNorm = 1.2963, lr_0 = 5.2112e-04
Loss = 1.7371e-01, PNorm = 72.4040, GNorm = 0.6877, lr_0 = 5.2076e-04
Loss = 1.5828e-01, PNorm = 72.4226, GNorm = 0.6662, lr_0 = 5.2040e-04
Loss = 1.3237e-01, PNorm = 72.4377, GNorm = 0.7120, lr_0 = 5.2005e-04
Loss = 1.2385e-01, PNorm = 72.4532, GNorm = 0.6584, lr_0 = 5.1969e-04
Loss = 1.3679e-01, PNorm = 72.4687, GNorm = 0.4775, lr_0 = 5.1933e-04
Loss = 1.4610e-01, PNorm = 72.4844, GNorm = 0.8480, lr_0 = 5.1898e-04
Loss = 1.3816e-01, PNorm = 72.5018, GNorm = 0.6769, lr_0 = 5.1862e-04
Loss = 1.3779e-01, PNorm = 72.5186, GNorm = 0.8075, lr_0 = 5.1827e-04
Loss = 1.4560e-01, PNorm = 72.5360, GNorm = 0.6472, lr_0 = 5.1791e-04
Validation mae = 0.244647
Epoch 10
Loss = 1.2341e-01, PNorm = 72.5583, GNorm = 0.8091, lr_0 = 5.1756e-04
Loss = 1.3572e-01, PNorm = 72.5736, GNorm = 0.4631, lr_0 = 5.1720e-04
Loss = 1.3073e-01, PNorm = 72.5853, GNorm = 0.6278, lr_0 = 5.1685e-04
Loss = 1.4161e-01, PNorm = 72.5979, GNorm = 0.5617, lr_0 = 5.1649e-04
Loss = 1.5175e-01, PNorm = 72.6142, GNorm = 0.6678, lr_0 = 5.1614e-04
Loss = 1.2751e-01, PNorm = 72.6316, GNorm = 0.6280, lr_0 = 5.1579e-04
Loss = 1.1675e-01, PNorm = 72.6483, GNorm = 0.7850, lr_0 = 5.1543e-04
Loss = 1.1776e-01, PNorm = 72.6624, GNorm = 0.6905, lr_0 = 5.1508e-04
Loss = 1.2239e-01, PNorm = 72.6803, GNorm = 0.5315, lr_0 = 5.1473e-04
Loss = 1.1565e-01, PNorm = 72.6983, GNorm = 0.5542, lr_0 = 5.1437e-04
Loss = 1.3608e-01, PNorm = 72.7147, GNorm = 0.7434, lr_0 = 5.1402e-04
Loss = 1.6599e-01, PNorm = 72.7337, GNorm = 0.7925, lr_0 = 5.1367e-04
Loss = 1.4261e-01, PNorm = 72.7588, GNorm = 0.7609, lr_0 = 5.1332e-04
Loss = 1.4432e-01, PNorm = 72.7792, GNorm = 0.9046, lr_0 = 5.1297e-04
Loss = 1.3822e-01, PNorm = 72.7993, GNorm = 1.1216, lr_0 = 5.1262e-04
Loss = 1.2215e-01, PNorm = 72.8133, GNorm = 0.6383, lr_0 = 5.1226e-04
Loss = 1.4164e-01, PNorm = 72.8249, GNorm = 0.7452, lr_0 = 5.1191e-04
Loss = 1.3119e-01, PNorm = 72.8367, GNorm = 0.5327, lr_0 = 5.1156e-04
Loss = 1.2234e-01, PNorm = 72.8524, GNorm = 0.9979, lr_0 = 5.1121e-04
Loss = 1.2853e-01, PNorm = 72.8666, GNorm = 0.6011, lr_0 = 5.1086e-04
Loss = 1.4538e-01, PNorm = 72.8815, GNorm = 0.9923, lr_0 = 5.1051e-04
Loss = 1.3221e-01, PNorm = 72.8927, GNorm = 1.0229, lr_0 = 5.1016e-04
Loss = 1.5082e-01, PNorm = 72.9134, GNorm = 1.0422, lr_0 = 5.0981e-04
Loss = 1.3232e-01, PNorm = 72.9307, GNorm = 0.6935, lr_0 = 5.0946e-04
Loss = 1.3321e-01, PNorm = 72.9437, GNorm = 0.5733, lr_0 = 5.0911e-04
Loss = 1.3197e-01, PNorm = 72.9592, GNorm = 0.5944, lr_0 = 5.0877e-04
Loss = 1.4098e-01, PNorm = 72.9798, GNorm = 0.7202, lr_0 = 5.0842e-04
Loss = 1.3750e-01, PNorm = 73.0003, GNorm = 0.7637, lr_0 = 5.0807e-04
Loss = 1.4522e-01, PNorm = 73.0136, GNorm = 0.7264, lr_0 = 5.0772e-04
Loss = 1.3532e-01, PNorm = 73.0246, GNorm = 1.7998, lr_0 = 5.0737e-04
Loss = 1.4141e-01, PNorm = 73.0381, GNorm = 0.6178, lr_0 = 5.0703e-04
Loss = 1.3054e-01, PNorm = 73.0522, GNorm = 0.6630, lr_0 = 5.0668e-04
Loss = 1.4879e-01, PNorm = 73.0623, GNorm = 0.6689, lr_0 = 5.0633e-04
Loss = 1.3178e-01, PNorm = 73.0789, GNorm = 0.5833, lr_0 = 5.0598e-04
Loss = 1.2990e-01, PNorm = 73.0917, GNorm = 0.8148, lr_0 = 5.0564e-04
Loss = 1.2973e-01, PNorm = 73.1064, GNorm = 0.7708, lr_0 = 5.0529e-04
Loss = 1.2888e-01, PNorm = 73.1236, GNorm = 0.8463, lr_0 = 5.0494e-04
Loss = 1.3092e-01, PNorm = 73.1444, GNorm = 0.6741, lr_0 = 5.0460e-04
Loss = 1.2557e-01, PNorm = 73.1496, GNorm = 0.8777, lr_0 = 5.0425e-04
Loss = 1.1155e-01, PNorm = 73.1619, GNorm = 0.8094, lr_0 = 5.0391e-04
Loss = 1.4998e-01, PNorm = 73.1753, GNorm = 0.5057, lr_0 = 5.0356e-04
Loss = 1.4280e-01, PNorm = 73.1949, GNorm = 0.5807, lr_0 = 5.0322e-04
Loss = 1.4205e-01, PNorm = 73.2141, GNorm = 0.6908, lr_0 = 5.0287e-04
Loss = 1.2900e-01, PNorm = 73.2307, GNorm = 0.6361, lr_0 = 5.0253e-04
Loss = 1.3046e-01, PNorm = 73.2431, GNorm = 1.1556, lr_0 = 5.0218e-04
Loss = 1.4605e-01, PNorm = 73.2543, GNorm = 1.0890, lr_0 = 5.0184e-04
Loss = 1.4013e-01, PNorm = 73.2676, GNorm = 0.6772, lr_0 = 5.0150e-04
Loss = 1.1783e-01, PNorm = 73.2830, GNorm = 1.5746, lr_0 = 5.0115e-04
Loss = 1.4434e-01, PNorm = 73.3004, GNorm = 0.9277, lr_0 = 5.0081e-04
Loss = 1.1754e-01, PNorm = 73.3163, GNorm = 0.6029, lr_0 = 5.0047e-04
Loss = 1.2567e-01, PNorm = 73.3284, GNorm = 1.0442, lr_0 = 5.0012e-04
Loss = 1.5008e-01, PNorm = 73.3407, GNorm = 0.8190, lr_0 = 4.9978e-04
Loss = 1.3271e-01, PNorm = 73.3542, GNorm = 0.5967, lr_0 = 4.9944e-04
Loss = 1.2970e-01, PNorm = 73.3715, GNorm = 0.5502, lr_0 = 4.9910e-04
Loss = 1.2517e-01, PNorm = 73.3838, GNorm = 0.6086, lr_0 = 4.9875e-04
Loss = 1.3230e-01, PNorm = 73.3940, GNorm = 0.8078, lr_0 = 4.9841e-04
Loss = 1.4791e-01, PNorm = 73.4110, GNorm = 1.0064, lr_0 = 4.9807e-04
Loss = 1.4183e-01, PNorm = 73.4238, GNorm = 1.0026, lr_0 = 4.9773e-04
Loss = 1.3674e-01, PNorm = 73.4437, GNorm = 0.4627, lr_0 = 4.9739e-04
Loss = 1.0818e-01, PNorm = 73.4619, GNorm = 1.0974, lr_0 = 4.9705e-04
Loss = 1.3298e-01, PNorm = 73.4763, GNorm = 0.5451, lr_0 = 4.9671e-04
Loss = 1.2433e-01, PNorm = 73.4854, GNorm = 0.5335, lr_0 = 4.9637e-04
Loss = 1.4140e-01, PNorm = 73.4944, GNorm = 0.5902, lr_0 = 4.9603e-04
Loss = 1.4307e-01, PNorm = 73.5072, GNorm = 0.6623, lr_0 = 4.9569e-04
Loss = 1.3693e-01, PNorm = 73.5262, GNorm = 0.7170, lr_0 = 4.9535e-04
Loss = 1.4377e-01, PNorm = 73.5407, GNorm = 0.7380, lr_0 = 4.9501e-04
Loss = 1.2662e-01, PNorm = 73.5533, GNorm = 0.7024, lr_0 = 4.9467e-04
Loss = 1.1209e-01, PNorm = 73.5705, GNorm = 0.6796, lr_0 = 4.9433e-04
Loss = 1.4228e-01, PNorm = 73.5875, GNorm = 0.6720, lr_0 = 4.9399e-04
Loss = 1.4857e-01, PNorm = 73.6011, GNorm = 0.6814, lr_0 = 4.9365e-04
Loss = 1.3955e-01, PNorm = 73.6174, GNorm = 0.8854, lr_0 = 4.9332e-04
Loss = 1.1910e-01, PNorm = 73.6338, GNorm = 1.1656, lr_0 = 4.9298e-04
Loss = 1.4026e-01, PNorm = 73.6530, GNorm = 1.1869, lr_0 = 4.9264e-04
Loss = 1.6907e-01, PNorm = 73.6612, GNorm = 0.6280, lr_0 = 4.9230e-04
Loss = 1.1918e-01, PNorm = 73.6809, GNorm = 0.9300, lr_0 = 4.9197e-04
Loss = 1.4690e-01, PNorm = 73.6936, GNorm = 1.1238, lr_0 = 4.9163e-04
Loss = 1.0676e-01, PNorm = 73.7075, GNorm = 0.6442, lr_0 = 4.9129e-04
Loss = 1.3391e-01, PNorm = 73.7220, GNorm = 0.7156, lr_0 = 4.9095e-04
Loss = 1.4087e-01, PNorm = 73.7408, GNorm = 0.9928, lr_0 = 4.9062e-04
Loss = 1.2144e-01, PNorm = 73.7636, GNorm = 0.6719, lr_0 = 4.9028e-04
Loss = 1.3887e-01, PNorm = 73.7752, GNorm = 0.6817, lr_0 = 4.8995e-04
Loss = 1.3238e-01, PNorm = 73.7888, GNorm = 0.4890, lr_0 = 4.8961e-04
Loss = 1.4265e-01, PNorm = 73.8050, GNorm = 0.6416, lr_0 = 4.8928e-04
Loss = 1.2692e-01, PNorm = 73.8207, GNorm = 1.7474, lr_0 = 4.8894e-04
Loss = 1.3256e-01, PNorm = 73.8383, GNorm = 0.6682, lr_0 = 4.8861e-04
Loss = 1.5955e-01, PNorm = 73.8606, GNorm = 0.7287, lr_0 = 4.8827e-04
Loss = 1.5519e-01, PNorm = 73.8786, GNorm = 0.6618, lr_0 = 4.8794e-04
Loss = 1.6573e-01, PNorm = 73.9016, GNorm = 0.7240, lr_0 = 4.8760e-04
Loss = 1.4824e-01, PNorm = 73.9199, GNorm = 0.6567, lr_0 = 4.8727e-04
Loss = 1.2974e-01, PNorm = 73.9315, GNorm = 1.0869, lr_0 = 4.8693e-04
Loss = 1.4122e-01, PNorm = 73.9437, GNorm = 1.3267, lr_0 = 4.8660e-04
Loss = 1.2969e-01, PNorm = 73.9568, GNorm = 0.7334, lr_0 = 4.8627e-04
Loss = 1.6211e-01, PNorm = 73.9694, GNorm = 0.7033, lr_0 = 4.8593e-04
Loss = 1.3532e-01, PNorm = 73.9797, GNorm = 0.9425, lr_0 = 4.8560e-04
Loss = 1.4066e-01, PNorm = 74.0010, GNorm = 0.9348, lr_0 = 4.8527e-04
Loss = 1.3343e-01, PNorm = 74.0169, GNorm = 0.7355, lr_0 = 4.8494e-04
Loss = 1.7177e-01, PNorm = 74.0303, GNorm = 0.8940, lr_0 = 4.8460e-04
Loss = 1.4220e-01, PNorm = 74.0447, GNorm = 1.0695, lr_0 = 4.8427e-04
Loss = 1.3333e-01, PNorm = 74.0583, GNorm = 1.0096, lr_0 = 4.8394e-04
Loss = 1.2072e-01, PNorm = 74.0718, GNorm = 0.6098, lr_0 = 4.8361e-04
Loss = 1.2168e-01, PNorm = 74.0854, GNorm = 0.6786, lr_0 = 4.8328e-04
Loss = 1.2846e-01, PNorm = 74.0987, GNorm = 1.0595, lr_0 = 4.8295e-04
Loss = 1.2940e-01, PNorm = 74.1110, GNorm = 0.9862, lr_0 = 4.8262e-04
Loss = 1.3894e-01, PNorm = 74.1300, GNorm = 0.6456, lr_0 = 4.8228e-04
Loss = 1.3415e-01, PNorm = 74.1437, GNorm = 0.7051, lr_0 = 4.8195e-04
Loss = 1.2808e-01, PNorm = 74.1556, GNorm = 0.7199, lr_0 = 4.8162e-04
Loss = 1.3950e-01, PNorm = 74.1648, GNorm = 0.5835, lr_0 = 4.8129e-04
Loss = 1.3423e-01, PNorm = 74.1768, GNorm = 0.9007, lr_0 = 4.8096e-04
Loss = 1.4736e-01, PNorm = 74.1951, GNorm = 0.8546, lr_0 = 4.8064e-04
Loss = 1.3927e-01, PNorm = 74.2139, GNorm = 0.7215, lr_0 = 4.8031e-04
Loss = 1.2760e-01, PNorm = 74.2353, GNorm = 0.7455, lr_0 = 4.7998e-04
Loss = 1.3538e-01, PNorm = 74.2470, GNorm = 0.6658, lr_0 = 4.7965e-04
Loss = 1.1825e-01, PNorm = 74.2567, GNorm = 0.5273, lr_0 = 4.7932e-04
Loss = 1.5290e-01, PNorm = 74.2697, GNorm = 1.0554, lr_0 = 4.7899e-04
Loss = 1.2938e-01, PNorm = 74.2817, GNorm = 0.4867, lr_0 = 4.7866e-04
Loss = 1.3788e-01, PNorm = 74.2875, GNorm = 1.2997, lr_0 = 4.7833e-04
Loss = 1.3061e-01, PNorm = 74.2997, GNorm = 0.5318, lr_0 = 4.7801e-04
Loss = 1.2803e-01, PNorm = 74.3119, GNorm = 0.5913, lr_0 = 4.7768e-04
Loss = 1.4501e-01, PNorm = 74.3270, GNorm = 0.6821, lr_0 = 4.7735e-04
Loss = 1.1708e-01, PNorm = 74.3442, GNorm = 1.0657, lr_0 = 4.7703e-04
Validation mae = 0.236109
Epoch 11
Loss = 1.0155e-01, PNorm = 74.3526, GNorm = 0.6666, lr_0 = 4.7670e-04
Loss = 1.3387e-01, PNorm = 74.3690, GNorm = 0.7463, lr_0 = 4.7637e-04
Loss = 1.2424e-01, PNorm = 74.3848, GNorm = 0.8575, lr_0 = 4.7605e-04
Loss = 1.1891e-01, PNorm = 74.3944, GNorm = 0.7606, lr_0 = 4.7572e-04
Loss = 1.3071e-01, PNorm = 74.4057, GNorm = 1.0914, lr_0 = 4.7539e-04
Loss = 1.2714e-01, PNorm = 74.4151, GNorm = 0.6034, lr_0 = 4.7507e-04
Loss = 1.2409e-01, PNorm = 74.4282, GNorm = 1.1117, lr_0 = 4.7474e-04
Loss = 1.4578e-01, PNorm = 74.4420, GNorm = 1.1608, lr_0 = 4.7442e-04
Loss = 1.4019e-01, PNorm = 74.4658, GNorm = 0.8924, lr_0 = 4.7409e-04
Loss = 1.0330e-01, PNorm = 74.4794, GNorm = 0.6822, lr_0 = 4.7377e-04
Loss = 1.1390e-01, PNorm = 74.4922, GNorm = 0.7560, lr_0 = 4.7344e-04
Loss = 1.0162e-01, PNorm = 74.5035, GNorm = 0.6284, lr_0 = 4.7312e-04
Loss = 1.1695e-01, PNorm = 74.5188, GNorm = 0.6519, lr_0 = 4.7279e-04
Loss = 1.1963e-01, PNorm = 74.5240, GNorm = 1.1981, lr_0 = 4.7247e-04
Loss = 1.3532e-01, PNorm = 74.5323, GNorm = 0.5753, lr_0 = 4.7215e-04
Loss = 1.1787e-01, PNorm = 74.5466, GNorm = 0.6533, lr_0 = 4.7182e-04
Loss = 1.2236e-01, PNorm = 74.5569, GNorm = 0.7491, lr_0 = 4.7150e-04
Loss = 1.2296e-01, PNorm = 74.5727, GNorm = 0.8134, lr_0 = 4.7118e-04
Loss = 1.1925e-01, PNorm = 74.5776, GNorm = 0.5542, lr_0 = 4.7085e-04
Loss = 1.3204e-01, PNorm = 74.5878, GNorm = 0.6747, lr_0 = 4.7053e-04
Loss = 1.1991e-01, PNorm = 74.6037, GNorm = 0.7451, lr_0 = 4.7021e-04
Loss = 1.2547e-01, PNorm = 74.6169, GNorm = 1.0701, lr_0 = 4.6989e-04
Loss = 1.3623e-01, PNorm = 74.6306, GNorm = 0.6261, lr_0 = 4.6957e-04
Loss = 1.3073e-01, PNorm = 74.6453, GNorm = 0.7563, lr_0 = 4.6924e-04
Loss = 1.0690e-01, PNorm = 74.6598, GNorm = 0.6396, lr_0 = 4.6892e-04
Loss = 1.2368e-01, PNorm = 74.6751, GNorm = 0.8096, lr_0 = 4.6860e-04
Loss = 1.3009e-01, PNorm = 74.6919, GNorm = 0.8746, lr_0 = 4.6828e-04
Loss = 1.1909e-01, PNorm = 74.7063, GNorm = 0.5163, lr_0 = 4.6796e-04
Loss = 1.3009e-01, PNorm = 74.7212, GNorm = 0.9397, lr_0 = 4.6764e-04
Loss = 1.2239e-01, PNorm = 74.7365, GNorm = 0.7801, lr_0 = 4.6732e-04
Loss = 1.5104e-01, PNorm = 74.7488, GNorm = 0.8944, lr_0 = 4.6700e-04
Loss = 1.2722e-01, PNorm = 74.7577, GNorm = 0.5420, lr_0 = 4.6668e-04
Loss = 1.5022e-01, PNorm = 74.7705, GNorm = 0.7713, lr_0 = 4.6636e-04
Loss = 1.3633e-01, PNorm = 74.7833, GNorm = 0.7708, lr_0 = 4.6604e-04
Loss = 1.2510e-01, PNorm = 74.7945, GNorm = 0.7139, lr_0 = 4.6572e-04
Loss = 1.2068e-01, PNorm = 74.8053, GNorm = 0.6250, lr_0 = 4.6540e-04
Loss = 1.1622e-01, PNorm = 74.8175, GNorm = 0.9539, lr_0 = 4.6508e-04
Loss = 1.3475e-01, PNorm = 74.8350, GNorm = 0.6464, lr_0 = 4.6476e-04
Loss = 1.3153e-01, PNorm = 74.8498, GNorm = 0.8552, lr_0 = 4.6445e-04
Loss = 1.3053e-01, PNorm = 74.8579, GNorm = 0.5874, lr_0 = 4.6413e-04
Loss = 1.1592e-01, PNorm = 74.8672, GNorm = 0.8420, lr_0 = 4.6381e-04
Loss = 1.2964e-01, PNorm = 74.8815, GNorm = 0.7152, lr_0 = 4.6349e-04
Loss = 1.3513e-01, PNorm = 74.8953, GNorm = 0.5422, lr_0 = 4.6317e-04
Loss = 1.2452e-01, PNorm = 74.9067, GNorm = 1.2439, lr_0 = 4.6286e-04
Loss = 1.3086e-01, PNorm = 74.9224, GNorm = 0.7381, lr_0 = 4.6254e-04
Loss = 1.4802e-01, PNorm = 74.9437, GNorm = 1.0922, lr_0 = 4.6222e-04
Loss = 1.2780e-01, PNorm = 74.9641, GNorm = 1.1145, lr_0 = 4.6191e-04
Loss = 1.3096e-01, PNorm = 74.9793, GNorm = 0.8791, lr_0 = 4.6159e-04
Loss = 1.2115e-01, PNorm = 74.9892, GNorm = 0.9001, lr_0 = 4.6127e-04
Loss = 1.3790e-01, PNorm = 75.0015, GNorm = 0.7305, lr_0 = 4.6096e-04
Loss = 1.1701e-01, PNorm = 75.0140, GNorm = 0.8594, lr_0 = 4.6064e-04
Loss = 1.2953e-01, PNorm = 75.0285, GNorm = 1.1060, lr_0 = 4.6033e-04
Loss = 1.4821e-01, PNorm = 75.0352, GNorm = 0.6996, lr_0 = 4.6001e-04
Loss = 1.1996e-01, PNorm = 75.0532, GNorm = 0.7757, lr_0 = 4.5970e-04
Loss = 1.0880e-01, PNorm = 75.0679, GNorm = 0.6045, lr_0 = 4.5938e-04
Loss = 1.2806e-01, PNorm = 75.0810, GNorm = 0.4483, lr_0 = 4.5907e-04
Loss = 1.2922e-01, PNorm = 75.0861, GNorm = 0.5472, lr_0 = 4.5875e-04
Loss = 1.4037e-01, PNorm = 75.1003, GNorm = 0.6129, lr_0 = 4.5844e-04
Loss = 1.2142e-01, PNorm = 75.1178, GNorm = 0.5525, lr_0 = 4.5812e-04
Loss = 1.3122e-01, PNorm = 75.1308, GNorm = 0.9658, lr_0 = 4.5781e-04
Loss = 1.3091e-01, PNorm = 75.1483, GNorm = 0.9442, lr_0 = 4.5750e-04
Loss = 1.1518e-01, PNorm = 75.1649, GNorm = 0.4916, lr_0 = 4.5718e-04
Loss = 1.1581e-01, PNorm = 75.1828, GNorm = 0.9706, lr_0 = 4.5687e-04
Loss = 1.3341e-01, PNorm = 75.1965, GNorm = 0.6074, lr_0 = 4.5656e-04
Loss = 1.2793e-01, PNorm = 75.2070, GNorm = 0.6333, lr_0 = 4.5624e-04
Loss = 1.2759e-01, PNorm = 75.2197, GNorm = 0.9433, lr_0 = 4.5593e-04
Loss = 1.1847e-01, PNorm = 75.2358, GNorm = 0.6196, lr_0 = 4.5562e-04
Loss = 1.2590e-01, PNorm = 75.2513, GNorm = 0.6704, lr_0 = 4.5531e-04
Loss = 1.2226e-01, PNorm = 75.2629, GNorm = 0.6610, lr_0 = 4.5499e-04
Loss = 1.0685e-01, PNorm = 75.2705, GNorm = 0.7229, lr_0 = 4.5468e-04
Loss = 1.2808e-01, PNorm = 75.2757, GNorm = 0.5637, lr_0 = 4.5437e-04
Loss = 1.3996e-01, PNorm = 75.2894, GNorm = 0.9180, lr_0 = 4.5406e-04
Loss = 1.2839e-01, PNorm = 75.3028, GNorm = 0.7641, lr_0 = 4.5375e-04
Loss = 1.4280e-01, PNorm = 75.3114, GNorm = 0.7640, lr_0 = 4.5344e-04
Loss = 1.1174e-01, PNorm = 75.3249, GNorm = 0.5861, lr_0 = 4.5313e-04
Loss = 1.2527e-01, PNorm = 75.3383, GNorm = 0.4586, lr_0 = 4.5282e-04
Loss = 1.3112e-01, PNorm = 75.3493, GNorm = 0.5522, lr_0 = 4.5251e-04
Loss = 1.1938e-01, PNorm = 75.3619, GNorm = 0.4993, lr_0 = 4.5220e-04
Loss = 1.3297e-01, PNorm = 75.3707, GNorm = 0.8658, lr_0 = 4.5189e-04
Loss = 1.2328e-01, PNorm = 75.3832, GNorm = 0.7581, lr_0 = 4.5158e-04
Loss = 1.2306e-01, PNorm = 75.3954, GNorm = 0.7400, lr_0 = 4.5127e-04
Loss = 1.1491e-01, PNorm = 75.4086, GNorm = 1.2192, lr_0 = 4.5096e-04
Loss = 1.1516e-01, PNorm = 75.4172, GNorm = 0.7017, lr_0 = 4.5065e-04
Loss = 1.2714e-01, PNorm = 75.4280, GNorm = 0.6398, lr_0 = 4.5034e-04
Loss = 1.4297e-01, PNorm = 75.4403, GNorm = 0.9388, lr_0 = 4.5003e-04
Loss = 1.2569e-01, PNorm = 75.4576, GNorm = 0.8244, lr_0 = 4.4972e-04
Loss = 1.2251e-01, PNorm = 75.4737, GNorm = 0.4044, lr_0 = 4.4942e-04
Loss = 1.3006e-01, PNorm = 75.4892, GNorm = 0.8409, lr_0 = 4.4911e-04
Loss = 1.1500e-01, PNorm = 75.5002, GNorm = 0.6665, lr_0 = 4.4880e-04
Loss = 1.4132e-01, PNorm = 75.5074, GNorm = 0.6482, lr_0 = 4.4849e-04
Loss = 1.3194e-01, PNorm = 75.5176, GNorm = 0.7105, lr_0 = 4.4819e-04
Loss = 1.2068e-01, PNorm = 75.5232, GNorm = 0.8525, lr_0 = 4.4788e-04
Loss = 1.2603e-01, PNorm = 75.5338, GNorm = 0.9248, lr_0 = 4.4757e-04
Loss = 1.2308e-01, PNorm = 75.5439, GNorm = 0.5712, lr_0 = 4.4727e-04
Loss = 1.3331e-01, PNorm = 75.5596, GNorm = 0.7154, lr_0 = 4.4696e-04
Loss = 1.3908e-01, PNorm = 75.5725, GNorm = 0.8021, lr_0 = 4.4665e-04
Loss = 1.1870e-01, PNorm = 75.5819, GNorm = 0.8738, lr_0 = 4.4635e-04
Loss = 1.2452e-01, PNorm = 75.5880, GNorm = 1.0996, lr_0 = 4.4604e-04
Loss = 1.3519e-01, PNorm = 75.6007, GNorm = 0.7236, lr_0 = 4.4574e-04
Loss = 1.3191e-01, PNorm = 75.6143, GNorm = 0.7973, lr_0 = 4.4543e-04
Loss = 1.4911e-01, PNorm = 75.6349, GNorm = 0.6763, lr_0 = 4.4513e-04
Loss = 1.4086e-01, PNorm = 75.6500, GNorm = 0.4907, lr_0 = 4.4482e-04
Loss = 1.2148e-01, PNorm = 75.6629, GNorm = 0.8710, lr_0 = 4.4452e-04
Loss = 1.3652e-01, PNorm = 75.6773, GNorm = 0.9525, lr_0 = 4.4421e-04
Loss = 1.4172e-01, PNorm = 75.6878, GNorm = 0.7152, lr_0 = 4.4391e-04
Loss = 1.2484e-01, PNorm = 75.6945, GNorm = 0.5684, lr_0 = 4.4360e-04
Loss = 1.2624e-01, PNorm = 75.7003, GNorm = 0.6960, lr_0 = 4.4330e-04
Loss = 1.5889e-01, PNorm = 75.7119, GNorm = 1.0453, lr_0 = 4.4299e-04
Loss = 1.2246e-01, PNorm = 75.7256, GNorm = 0.7012, lr_0 = 4.4269e-04
Loss = 1.3716e-01, PNorm = 75.7392, GNorm = 0.5433, lr_0 = 4.4239e-04
Loss = 1.1849e-01, PNorm = 75.7484, GNorm = 0.6044, lr_0 = 4.4209e-04
Loss = 1.2620e-01, PNorm = 75.7593, GNorm = 0.6883, lr_0 = 4.4178e-04
Loss = 1.2272e-01, PNorm = 75.7741, GNorm = 0.6192, lr_0 = 4.4148e-04
Loss = 1.4015e-01, PNorm = 75.7849, GNorm = 0.9843, lr_0 = 4.4118e-04
Loss = 1.2676e-01, PNorm = 75.7958, GNorm = 0.5830, lr_0 = 4.4088e-04
Loss = 1.4387e-01, PNorm = 75.8065, GNorm = 0.6324, lr_0 = 4.4057e-04
Loss = 1.3164e-01, PNorm = 75.8190, GNorm = 0.6684, lr_0 = 4.4027e-04
Loss = 1.3169e-01, PNorm = 75.8353, GNorm = 0.7795, lr_0 = 4.3997e-04
Loss = 1.2985e-01, PNorm = 75.8456, GNorm = 1.0024, lr_0 = 4.3967e-04
Loss = 1.2741e-01, PNorm = 75.8600, GNorm = 0.6319, lr_0 = 4.3937e-04
Validation mae = 0.237347
Epoch 12
Loss = 1.2053e-01, PNorm = 75.8770, GNorm = 0.8709, lr_0 = 4.3907e-04
Loss = 1.1920e-01, PNorm = 75.8886, GNorm = 0.7146, lr_0 = 4.3877e-04
Loss = 1.1926e-01, PNorm = 75.8995, GNorm = 0.6920, lr_0 = 4.3846e-04
Loss = 1.2109e-01, PNorm = 75.9097, GNorm = 0.8163, lr_0 = 4.3816e-04
Loss = 1.2161e-01, PNorm = 75.9264, GNorm = 0.8415, lr_0 = 4.3786e-04
Loss = 1.2711e-01, PNorm = 75.9466, GNorm = 0.6405, lr_0 = 4.3756e-04
Loss = 1.3106e-01, PNorm = 75.9578, GNorm = 0.7754, lr_0 = 4.3726e-04
Loss = 1.3226e-01, PNorm = 75.9747, GNorm = 0.7417, lr_0 = 4.3696e-04
Loss = 1.4513e-01, PNorm = 75.9848, GNorm = 0.6434, lr_0 = 4.3667e-04
Loss = 1.1760e-01, PNorm = 76.0005, GNorm = 0.6563, lr_0 = 4.3637e-04
Loss = 1.4108e-01, PNorm = 76.0111, GNorm = 0.7741, lr_0 = 4.3607e-04
Loss = 1.2610e-01, PNorm = 76.0267, GNorm = 0.7414, lr_0 = 4.3577e-04
Loss = 1.1017e-01, PNorm = 76.0413, GNorm = 0.9050, lr_0 = 4.3547e-04
Loss = 1.3491e-01, PNorm = 76.0567, GNorm = 0.8746, lr_0 = 4.3517e-04
Loss = 1.0824e-01, PNorm = 76.0682, GNorm = 0.8718, lr_0 = 4.3487e-04
Loss = 1.1507e-01, PNorm = 76.0752, GNorm = 1.1244, lr_0 = 4.3458e-04
Loss = 1.2188e-01, PNorm = 76.0867, GNorm = 0.5242, lr_0 = 4.3428e-04
Loss = 1.2964e-01, PNorm = 76.0959, GNorm = 0.7783, lr_0 = 4.3398e-04
Loss = 1.1458e-01, PNorm = 76.1069, GNorm = 0.6195, lr_0 = 4.3368e-04
Loss = 1.2819e-01, PNorm = 76.1202, GNorm = 0.8625, lr_0 = 4.3339e-04
Loss = 1.1593e-01, PNorm = 76.1339, GNorm = 1.0342, lr_0 = 4.3309e-04
Loss = 1.3215e-01, PNorm = 76.1464, GNorm = 0.6499, lr_0 = 4.3279e-04
Loss = 1.2071e-01, PNorm = 76.1577, GNorm = 0.7813, lr_0 = 4.3250e-04
Loss = 1.0328e-01, PNorm = 76.1734, GNorm = 0.6154, lr_0 = 4.3220e-04
Loss = 1.1235e-01, PNorm = 76.1863, GNorm = 0.7807, lr_0 = 4.3190e-04
Loss = 1.2264e-01, PNorm = 76.1961, GNorm = 0.6632, lr_0 = 4.3161e-04
Loss = 1.0861e-01, PNorm = 76.2063, GNorm = 0.6425, lr_0 = 4.3131e-04
Loss = 1.1693e-01, PNorm = 76.2147, GNorm = 0.5631, lr_0 = 4.3102e-04
Loss = 1.1742e-01, PNorm = 76.2261, GNorm = 0.6417, lr_0 = 4.3072e-04
Loss = 1.2802e-01, PNorm = 76.2363, GNorm = 0.7638, lr_0 = 4.3043e-04
Loss = 1.2198e-01, PNorm = 76.2459, GNorm = 0.5149, lr_0 = 4.3013e-04
Loss = 1.4703e-01, PNorm = 76.2537, GNorm = 1.0465, lr_0 = 4.2984e-04
Loss = 1.2310e-01, PNorm = 76.2638, GNorm = 0.5281, lr_0 = 4.2954e-04
Loss = 1.2617e-01, PNorm = 76.2730, GNorm = 0.6629, lr_0 = 4.2925e-04
Loss = 1.2369e-01, PNorm = 76.2851, GNorm = 0.5810, lr_0 = 4.2895e-04
Loss = 1.3181e-01, PNorm = 76.2887, GNorm = 0.8202, lr_0 = 4.2866e-04
Loss = 1.2517e-01, PNorm = 76.3013, GNorm = 0.6411, lr_0 = 4.2837e-04
Loss = 1.1566e-01, PNorm = 76.3123, GNorm = 0.4886, lr_0 = 4.2807e-04
Loss = 1.2123e-01, PNorm = 76.3218, GNorm = 0.5762, lr_0 = 4.2778e-04
Loss = 1.2093e-01, PNorm = 76.3277, GNorm = 0.6411, lr_0 = 4.2749e-04
Loss = 1.2849e-01, PNorm = 76.3372, GNorm = 0.5083, lr_0 = 4.2719e-04
Loss = 1.1673e-01, PNorm = 76.3501, GNorm = 0.5970, lr_0 = 4.2690e-04
Loss = 1.2017e-01, PNorm = 76.3626, GNorm = 0.8011, lr_0 = 4.2661e-04
Loss = 1.1450e-01, PNorm = 76.3778, GNorm = 0.5533, lr_0 = 4.2632e-04
Loss = 1.0725e-01, PNorm = 76.3852, GNorm = 0.4943, lr_0 = 4.2602e-04
Loss = 1.1738e-01, PNorm = 76.3997, GNorm = 0.7862, lr_0 = 4.2573e-04
Loss = 1.3113e-01, PNorm = 76.4133, GNorm = 0.6853, lr_0 = 4.2544e-04
Loss = 1.1658e-01, PNorm = 76.4261, GNorm = 0.6767, lr_0 = 4.2515e-04
Loss = 1.1322e-01, PNorm = 76.4406, GNorm = 0.6218, lr_0 = 4.2486e-04
Loss = 1.2097e-01, PNorm = 76.4509, GNorm = 0.5124, lr_0 = 4.2457e-04
Loss = 1.2699e-01, PNorm = 76.4609, GNorm = 0.6534, lr_0 = 4.2428e-04
Loss = 1.2924e-01, PNorm = 76.4694, GNorm = 1.1896, lr_0 = 4.2399e-04
Loss = 1.2612e-01, PNorm = 76.4842, GNorm = 0.6414, lr_0 = 4.2370e-04
Loss = 1.1731e-01, PNorm = 76.4946, GNorm = 0.6467, lr_0 = 4.2340e-04
Loss = 1.3099e-01, PNorm = 76.5060, GNorm = 0.5659, lr_0 = 4.2311e-04
Loss = 1.2423e-01, PNorm = 76.5214, GNorm = 0.7393, lr_0 = 4.2283e-04
Loss = 1.0715e-01, PNorm = 76.5383, GNorm = 0.6630, lr_0 = 4.2254e-04
Loss = 1.2407e-01, PNorm = 76.5491, GNorm = 0.5168, lr_0 = 4.2225e-04
Loss = 1.2354e-01, PNorm = 76.5597, GNorm = 0.9253, lr_0 = 4.2196e-04
Loss = 1.1845e-01, PNorm = 76.5659, GNorm = 0.9852, lr_0 = 4.2167e-04
Loss = 1.1152e-01, PNorm = 76.5768, GNorm = 0.7654, lr_0 = 4.2138e-04
Loss = 1.1792e-01, PNorm = 76.5828, GNorm = 0.9735, lr_0 = 4.2109e-04
Loss = 1.3085e-01, PNorm = 76.5933, GNorm = 0.8009, lr_0 = 4.2080e-04
Loss = 1.3415e-01, PNorm = 76.6030, GNorm = 0.9631, lr_0 = 4.2051e-04
Loss = 1.1289e-01, PNorm = 76.6119, GNorm = 0.6725, lr_0 = 4.2023e-04
Loss = 1.1099e-01, PNorm = 76.6266, GNorm = 0.6376, lr_0 = 4.1994e-04
Loss = 1.1419e-01, PNorm = 76.6378, GNorm = 0.8371, lr_0 = 4.1965e-04
Loss = 1.0797e-01, PNorm = 76.6463, GNorm = 0.5224, lr_0 = 4.1936e-04
Loss = 1.1452e-01, PNorm = 76.6570, GNorm = 0.5786, lr_0 = 4.1907e-04
Loss = 1.1715e-01, PNorm = 76.6669, GNorm = 0.7814, lr_0 = 4.1879e-04
Loss = 1.2688e-01, PNorm = 76.6775, GNorm = 0.8333, lr_0 = 4.1850e-04
Loss = 1.3262e-01, PNorm = 76.6916, GNorm = 0.9519, lr_0 = 4.1821e-04
Loss = 1.2337e-01, PNorm = 76.7056, GNorm = 0.6515, lr_0 = 4.1793e-04
Loss = 1.1780e-01, PNorm = 76.7166, GNorm = 0.6491, lr_0 = 4.1764e-04
Loss = 1.2106e-01, PNorm = 76.7281, GNorm = 0.5474, lr_0 = 4.1736e-04
Loss = 1.1103e-01, PNorm = 76.7339, GNorm = 0.8632, lr_0 = 4.1707e-04
Loss = 1.2445e-01, PNorm = 76.7413, GNorm = 1.2061, lr_0 = 4.1678e-04
Loss = 1.2516e-01, PNorm = 76.7561, GNorm = 0.8016, lr_0 = 4.1650e-04
Loss = 1.2623e-01, PNorm = 76.7631, GNorm = 0.5008, lr_0 = 4.1621e-04
Loss = 1.2691e-01, PNorm = 76.7730, GNorm = 0.6351, lr_0 = 4.1593e-04
Loss = 1.2724e-01, PNorm = 76.7801, GNorm = 0.6323, lr_0 = 4.1564e-04
Loss = 1.3611e-01, PNorm = 76.7940, GNorm = 1.3837, lr_0 = 4.1536e-04
Loss = 1.3736e-01, PNorm = 76.8124, GNorm = 0.5989, lr_0 = 4.1507e-04
Loss = 1.4279e-01, PNorm = 76.8253, GNorm = 0.7505, lr_0 = 4.1479e-04
Loss = 1.1731e-01, PNorm = 76.8423, GNorm = 0.6938, lr_0 = 4.1450e-04
Loss = 1.3764e-01, PNorm = 76.8502, GNorm = 0.6452, lr_0 = 4.1422e-04
Loss = 1.1287e-01, PNorm = 76.8599, GNorm = 0.6675, lr_0 = 4.1394e-04
Loss = 1.2364e-01, PNorm = 76.8700, GNorm = 0.6872, lr_0 = 4.1365e-04
Loss = 1.4088e-01, PNorm = 76.8868, GNorm = 0.5938, lr_0 = 4.1337e-04
Loss = 1.2662e-01, PNorm = 76.9007, GNorm = 0.5263, lr_0 = 4.1309e-04
Loss = 1.3317e-01, PNorm = 76.9123, GNorm = 0.8363, lr_0 = 4.1280e-04
Loss = 1.1935e-01, PNorm = 76.9208, GNorm = 0.7517, lr_0 = 4.1252e-04
Loss = 1.1357e-01, PNorm = 76.9311, GNorm = 0.5034, lr_0 = 4.1224e-04
Loss = 1.1528e-01, PNorm = 76.9397, GNorm = 0.6827, lr_0 = 4.1196e-04
Loss = 1.2734e-01, PNorm = 76.9478, GNorm = 0.6040, lr_0 = 4.1167e-04
Loss = 1.5850e-01, PNorm = 76.9564, GNorm = 0.6888, lr_0 = 4.1139e-04
Loss = 1.1599e-01, PNorm = 76.9692, GNorm = 1.2488, lr_0 = 4.1111e-04
Loss = 1.3035e-01, PNorm = 76.9815, GNorm = 0.9649, lr_0 = 4.1083e-04
Loss = 1.2278e-01, PNorm = 76.9930, GNorm = 0.5964, lr_0 = 4.1055e-04
Loss = 1.5164e-01, PNorm = 77.0103, GNorm = 0.9219, lr_0 = 4.1027e-04
Loss = 1.4374e-01, PNorm = 77.0240, GNorm = 0.9064, lr_0 = 4.0998e-04
Loss = 1.0951e-01, PNorm = 77.0392, GNorm = 0.7246, lr_0 = 4.0970e-04
Loss = 1.2435e-01, PNorm = 77.0507, GNorm = 0.6204, lr_0 = 4.0942e-04
Loss = 1.2801e-01, PNorm = 77.0610, GNorm = 0.6037, lr_0 = 4.0914e-04
Loss = 1.1022e-01, PNorm = 77.0666, GNorm = 0.4444, lr_0 = 4.0886e-04
Loss = 1.2436e-01, PNorm = 77.0729, GNorm = 0.8149, lr_0 = 4.0858e-04
Loss = 1.1058e-01, PNorm = 77.0813, GNorm = 0.7676, lr_0 = 4.0830e-04
Loss = 1.2759e-01, PNorm = 77.0911, GNorm = 0.7834, lr_0 = 4.0802e-04
Loss = 1.1438e-01, PNorm = 77.1028, GNorm = 0.6145, lr_0 = 4.0774e-04
Loss = 1.2417e-01, PNorm = 77.1134, GNorm = 0.5602, lr_0 = 4.0746e-04
Loss = 1.3910e-01, PNorm = 77.1206, GNorm = 0.6928, lr_0 = 4.0718e-04
Loss = 1.1319e-01, PNorm = 77.1344, GNorm = 0.5211, lr_0 = 4.0691e-04
Loss = 1.2060e-01, PNorm = 77.1423, GNorm = 0.6105, lr_0 = 4.0663e-04
Loss = 1.2104e-01, PNorm = 77.1517, GNorm = 0.8357, lr_0 = 4.0635e-04
Loss = 1.2891e-01, PNorm = 77.1595, GNorm = 0.7709, lr_0 = 4.0607e-04
Loss = 1.2969e-01, PNorm = 77.1689, GNorm = 0.5085, lr_0 = 4.0579e-04
Loss = 1.2647e-01, PNorm = 77.1811, GNorm = 0.8477, lr_0 = 4.0551e-04
Loss = 1.3528e-01, PNorm = 77.1875, GNorm = 0.7086, lr_0 = 4.0524e-04
Loss = 1.2554e-01, PNorm = 77.2016, GNorm = 0.5519, lr_0 = 4.0496e-04
Loss = 1.2757e-01, PNorm = 77.2115, GNorm = 0.6019, lr_0 = 4.0468e-04
Validation mae = 0.232960
Epoch 13
Loss = 1.1968e-01, PNorm = 77.2238, GNorm = 0.7333, lr_0 = 4.0440e-04
Loss = 1.0889e-01, PNorm = 77.2347, GNorm = 0.5940, lr_0 = 4.0413e-04
Loss = 1.1080e-01, PNorm = 77.2430, GNorm = 0.7679, lr_0 = 4.0385e-04
Loss = 1.1221e-01, PNorm = 77.2535, GNorm = 1.0662, lr_0 = 4.0357e-04
Loss = 1.1524e-01, PNorm = 77.2634, GNorm = 0.9639, lr_0 = 4.0330e-04
Loss = 1.1250e-01, PNorm = 77.2772, GNorm = 0.5225, lr_0 = 4.0302e-04
Loss = 1.0959e-01, PNorm = 77.2913, GNorm = 0.4906, lr_0 = 4.0274e-04
Loss = 1.2368e-01, PNorm = 77.2990, GNorm = 0.5797, lr_0 = 4.0247e-04
Loss = 1.2191e-01, PNorm = 77.3089, GNorm = 0.9215, lr_0 = 4.0219e-04
Loss = 1.1568e-01, PNorm = 77.3185, GNorm = 0.6337, lr_0 = 4.0192e-04
Loss = 1.2295e-01, PNorm = 77.3294, GNorm = 0.7959, lr_0 = 4.0164e-04
Loss = 1.1116e-01, PNorm = 77.3368, GNorm = 0.4315, lr_0 = 4.0137e-04
Loss = 1.1125e-01, PNorm = 77.3509, GNorm = 0.7739, lr_0 = 4.0109e-04
Loss = 1.1961e-01, PNorm = 77.3616, GNorm = 0.6685, lr_0 = 4.0082e-04
Loss = 1.2652e-01, PNorm = 77.3723, GNorm = 0.8886, lr_0 = 4.0054e-04
Loss = 1.2355e-01, PNorm = 77.3862, GNorm = 0.6533, lr_0 = 4.0027e-04
Loss = 1.0307e-01, PNorm = 77.3985, GNorm = 0.7084, lr_0 = 3.9999e-04
Loss = 1.0284e-01, PNorm = 77.4102, GNorm = 0.9454, lr_0 = 3.9972e-04
Loss = 1.1390e-01, PNorm = 77.4155, GNorm = 0.7840, lr_0 = 3.9945e-04
Loss = 1.3355e-01, PNorm = 77.4304, GNorm = 0.6745, lr_0 = 3.9917e-04
Loss = 1.1149e-01, PNorm = 77.4390, GNorm = 0.9026, lr_0 = 3.9890e-04
Loss = 1.2068e-01, PNorm = 77.4505, GNorm = 0.9463, lr_0 = 3.9863e-04
Loss = 1.0470e-01, PNorm = 77.4609, GNorm = 1.0447, lr_0 = 3.9835e-04
Loss = 1.0767e-01, PNorm = 77.4646, GNorm = 0.6394, lr_0 = 3.9808e-04
Loss = 1.0403e-01, PNorm = 77.4730, GNorm = 0.6723, lr_0 = 3.9781e-04
Loss = 1.0932e-01, PNorm = 77.4812, GNorm = 0.5033, lr_0 = 3.9753e-04
Loss = 1.1976e-01, PNorm = 77.4908, GNorm = 1.0672, lr_0 = 3.9726e-04
Loss = 1.1679e-01, PNorm = 77.5038, GNorm = 0.8105, lr_0 = 3.9699e-04
Loss = 1.0859e-01, PNorm = 77.5143, GNorm = 0.6954, lr_0 = 3.9672e-04
Loss = 1.1745e-01, PNorm = 77.5262, GNorm = 0.6166, lr_0 = 3.9645e-04
Loss = 1.1994e-01, PNorm = 77.5376, GNorm = 0.7162, lr_0 = 3.9617e-04
Loss = 9.8596e-02, PNorm = 77.5479, GNorm = 0.7007, lr_0 = 3.9590e-04
Loss = 1.0964e-01, PNorm = 77.5555, GNorm = 0.4695, lr_0 = 3.9563e-04
Loss = 1.0578e-01, PNorm = 77.5623, GNorm = 0.7002, lr_0 = 3.9536e-04
Loss = 1.1297e-01, PNorm = 77.5716, GNorm = 0.9048, lr_0 = 3.9509e-04
Loss = 1.2840e-01, PNorm = 77.5823, GNorm = 0.6609, lr_0 = 3.9482e-04
Loss = 1.1203e-01, PNorm = 77.5899, GNorm = 0.5380, lr_0 = 3.9455e-04
Loss = 1.0751e-01, PNorm = 77.5964, GNorm = 0.7013, lr_0 = 3.9428e-04
Loss = 1.1272e-01, PNorm = 77.6043, GNorm = 0.5609, lr_0 = 3.9401e-04
Loss = 1.1820e-01, PNorm = 77.6128, GNorm = 0.8402, lr_0 = 3.9374e-04
Loss = 1.3114e-01, PNorm = 77.6275, GNorm = 2.1714, lr_0 = 3.9347e-04
Loss = 1.3024e-01, PNorm = 77.6414, GNorm = 0.5380, lr_0 = 3.9320e-04
Loss = 1.1531e-01, PNorm = 77.6544, GNorm = 0.6933, lr_0 = 3.9293e-04
Loss = 1.0972e-01, PNorm = 77.6643, GNorm = 0.9488, lr_0 = 3.9266e-04
Loss = 1.3362e-01, PNorm = 77.6706, GNorm = 0.7145, lr_0 = 3.9239e-04
Loss = 1.2850e-01, PNorm = 77.6834, GNorm = 0.7138, lr_0 = 3.9212e-04
Loss = 1.0673e-01, PNorm = 77.6859, GNorm = 0.6270, lr_0 = 3.9185e-04
Loss = 1.1307e-01, PNorm = 77.6896, GNorm = 0.5055, lr_0 = 3.9159e-04
Loss = 1.1621e-01, PNorm = 77.6986, GNorm = 0.7741, lr_0 = 3.9132e-04
Loss = 1.4618e-01, PNorm = 77.7064, GNorm = 0.6617, lr_0 = 3.9105e-04
Loss = 1.1543e-01, PNorm = 77.7204, GNorm = 0.8300, lr_0 = 3.9078e-04
Loss = 1.2350e-01, PNorm = 77.7332, GNorm = 0.7519, lr_0 = 3.9051e-04
Loss = 1.2566e-01, PNorm = 77.7433, GNorm = 0.5221, lr_0 = 3.9025e-04
Loss = 1.1055e-01, PNorm = 77.7510, GNorm = 0.6515, lr_0 = 3.8998e-04
Loss = 1.0718e-01, PNorm = 77.7543, GNorm = 0.6326, lr_0 = 3.8971e-04
Loss = 1.0752e-01, PNorm = 77.7660, GNorm = 0.5138, lr_0 = 3.8945e-04
Loss = 9.2469e-02, PNorm = 77.7762, GNorm = 0.5621, lr_0 = 3.8918e-04
Loss = 1.1234e-01, PNorm = 77.7848, GNorm = 0.6637, lr_0 = 3.8891e-04
Loss = 1.1455e-01, PNorm = 77.7937, GNorm = 1.1342, lr_0 = 3.8865e-04
Loss = 1.1291e-01, PNorm = 77.7995, GNorm = 0.8671, lr_0 = 3.8838e-04
Loss = 1.1683e-01, PNorm = 77.8120, GNorm = 0.9088, lr_0 = 3.8811e-04
Loss = 1.1181e-01, PNorm = 77.8206, GNorm = 0.7626, lr_0 = 3.8785e-04
Loss = 1.2131e-01, PNorm = 77.8311, GNorm = 0.5036, lr_0 = 3.8758e-04
Loss = 1.1458e-01, PNorm = 77.8392, GNorm = 0.7145, lr_0 = 3.8732e-04
Loss = 1.2417e-01, PNorm = 77.8472, GNorm = 0.6574, lr_0 = 3.8705e-04
Loss = 1.1172e-01, PNorm = 77.8553, GNorm = 0.5169, lr_0 = 3.8679e-04
Loss = 1.1916e-01, PNorm = 77.8631, GNorm = 0.8241, lr_0 = 3.8652e-04
Loss = 1.2765e-01, PNorm = 77.8754, GNorm = 0.6822, lr_0 = 3.8626e-04
Loss = 1.1637e-01, PNorm = 77.8852, GNorm = 0.7405, lr_0 = 3.8599e-04
Loss = 1.1195e-01, PNorm = 77.8949, GNorm = 0.6011, lr_0 = 3.8573e-04
Loss = 1.3164e-01, PNorm = 77.9037, GNorm = 0.5850, lr_0 = 3.8546e-04
Loss = 1.2630e-01, PNorm = 77.9126, GNorm = 0.7208, lr_0 = 3.8520e-04
Loss = 1.1049e-01, PNorm = 77.9219, GNorm = 0.6540, lr_0 = 3.8493e-04
Loss = 1.1232e-01, PNorm = 77.9287, GNorm = 0.7310, lr_0 = 3.8467e-04
Loss = 1.1707e-01, PNorm = 77.9351, GNorm = 0.6658, lr_0 = 3.8441e-04
Loss = 1.2016e-01, PNorm = 77.9419, GNorm = 0.5744, lr_0 = 3.8414e-04
Loss = 1.2535e-01, PNorm = 77.9503, GNorm = 0.6809, lr_0 = 3.8388e-04
Loss = 1.1870e-01, PNorm = 77.9646, GNorm = 0.4891, lr_0 = 3.8362e-04
Loss = 1.2557e-01, PNorm = 77.9736, GNorm = 0.6985, lr_0 = 3.8336e-04
Loss = 9.8988e-02, PNorm = 77.9901, GNorm = 0.5544, lr_0 = 3.8309e-04
Loss = 1.2449e-01, PNorm = 78.0009, GNorm = 0.5999, lr_0 = 3.8283e-04
Loss = 1.0972e-01, PNorm = 78.0096, GNorm = 0.6653, lr_0 = 3.8257e-04
Loss = 1.2760e-01, PNorm = 78.0162, GNorm = 0.9099, lr_0 = 3.8231e-04
Loss = 1.2811e-01, PNorm = 78.0191, GNorm = 0.5952, lr_0 = 3.8204e-04
Loss = 1.0014e-01, PNorm = 78.0297, GNorm = 0.4691, lr_0 = 3.8178e-04
Loss = 1.2635e-01, PNorm = 78.0439, GNorm = 0.5905, lr_0 = 3.8152e-04
Loss = 1.1576e-01, PNorm = 78.0573, GNorm = 0.6658, lr_0 = 3.8126e-04
Loss = 1.1404e-01, PNorm = 78.0704, GNorm = 0.7633, lr_0 = 3.8100e-04
Loss = 1.1542e-01, PNorm = 78.0814, GNorm = 0.7427, lr_0 = 3.8074e-04
Loss = 1.2344e-01, PNorm = 78.0906, GNorm = 1.0186, lr_0 = 3.8048e-04
Loss = 1.2151e-01, PNorm = 78.0987, GNorm = 0.5544, lr_0 = 3.8022e-04
Loss = 1.0535e-01, PNorm = 78.1103, GNorm = 0.6103, lr_0 = 3.7995e-04
Loss = 1.0028e-01, PNorm = 78.1239, GNorm = 0.6162, lr_0 = 3.7969e-04
Loss = 1.2232e-01, PNorm = 78.1336, GNorm = 0.5808, lr_0 = 3.7943e-04
Loss = 1.1668e-01, PNorm = 78.1434, GNorm = 0.8069, lr_0 = 3.7917e-04
Loss = 1.0907e-01, PNorm = 78.1499, GNorm = 0.6926, lr_0 = 3.7891e-04
Loss = 1.1685e-01, PNorm = 78.1559, GNorm = 0.6229, lr_0 = 3.7866e-04
Loss = 1.1292e-01, PNorm = 78.1679, GNorm = 0.6326, lr_0 = 3.7840e-04
Loss = 1.1086e-01, PNorm = 78.1698, GNorm = 0.5123, lr_0 = 3.7814e-04
Loss = 1.2287e-01, PNorm = 78.1793, GNorm = 0.5619, lr_0 = 3.7788e-04
Loss = 1.2476e-01, PNorm = 78.1856, GNorm = 0.6433, lr_0 = 3.7762e-04
Loss = 1.3658e-01, PNorm = 78.1939, GNorm = 0.7760, lr_0 = 3.7736e-04
Loss = 1.5530e-01, PNorm = 78.2063, GNorm = 0.7126, lr_0 = 3.7710e-04
Loss = 1.3199e-01, PNorm = 78.2170, GNorm = 0.6899, lr_0 = 3.7684e-04
Loss = 1.2855e-01, PNorm = 78.2249, GNorm = 1.2449, lr_0 = 3.7659e-04
Loss = 1.2819e-01, PNorm = 78.2374, GNorm = 0.6868, lr_0 = 3.7633e-04
Loss = 1.0781e-01, PNorm = 78.2509, GNorm = 0.7008, lr_0 = 3.7607e-04
Loss = 1.3068e-01, PNorm = 78.2620, GNorm = 0.9760, lr_0 = 3.7581e-04
Loss = 1.3327e-01, PNorm = 78.2782, GNorm = 0.6914, lr_0 = 3.7555e-04
Loss = 1.3334e-01, PNorm = 78.2884, GNorm = 0.7218, lr_0 = 3.7530e-04
Loss = 1.2172e-01, PNorm = 78.2961, GNorm = 0.9356, lr_0 = 3.7504e-04
Loss = 1.2323e-01, PNorm = 78.2969, GNorm = 0.5824, lr_0 = 3.7478e-04
Loss = 1.2699e-01, PNorm = 78.3055, GNorm = 0.7912, lr_0 = 3.7453e-04
Loss = 1.3261e-01, PNorm = 78.3151, GNorm = 0.7943, lr_0 = 3.7427e-04
Loss = 1.1884e-01, PNorm = 78.3270, GNorm = 0.5173, lr_0 = 3.7401e-04
Loss = 1.2572e-01, PNorm = 78.3343, GNorm = 0.6148, lr_0 = 3.7376e-04
Loss = 1.1108e-01, PNorm = 78.3445, GNorm = 1.0043, lr_0 = 3.7350e-04
Loss = 1.1490e-01, PNorm = 78.3537, GNorm = 0.6404, lr_0 = 3.7325e-04
Loss = 1.2823e-01, PNorm = 78.3660, GNorm = 1.0551, lr_0 = 3.7299e-04
Loss = 1.0962e-01, PNorm = 78.3806, GNorm = 0.8311, lr_0 = 3.7273e-04
Validation mae = 0.231914
Epoch 14
Loss = 1.0489e-01, PNorm = 78.3958, GNorm = 0.8633, lr_0 = 3.7248e-04
Loss = 1.0540e-01, PNorm = 78.4067, GNorm = 0.5245, lr_0 = 3.7222e-04
Loss = 1.1102e-01, PNorm = 78.4208, GNorm = 0.8956, lr_0 = 3.7197e-04
Loss = 9.8475e-02, PNorm = 78.4311, GNorm = 0.7733, lr_0 = 3.7171e-04
Loss = 1.2067e-01, PNorm = 78.4417, GNorm = 0.7950, lr_0 = 3.7146e-04
Loss = 1.2301e-01, PNorm = 78.4501, GNorm = 0.8047, lr_0 = 3.7120e-04
Loss = 1.0717e-01, PNorm = 78.4595, GNorm = 0.6401, lr_0 = 3.7095e-04
Loss = 1.0766e-01, PNorm = 78.4692, GNorm = 1.0267, lr_0 = 3.7070e-04
Loss = 1.1682e-01, PNorm = 78.4751, GNorm = 0.6315, lr_0 = 3.7044e-04
Loss = 1.1128e-01, PNorm = 78.4896, GNorm = 0.6561, lr_0 = 3.7019e-04
Loss = 1.0044e-01, PNorm = 78.5025, GNorm = 0.6618, lr_0 = 3.6993e-04
Loss = 9.7960e-02, PNorm = 78.5145, GNorm = 0.7678, lr_0 = 3.6968e-04
Loss = 1.1093e-01, PNorm = 78.5226, GNorm = 0.8932, lr_0 = 3.6943e-04
Loss = 1.1518e-01, PNorm = 78.5290, GNorm = 0.5880, lr_0 = 3.6917e-04
Loss = 1.0732e-01, PNorm = 78.5357, GNorm = 1.0159, lr_0 = 3.6892e-04
Loss = 1.0654e-01, PNorm = 78.5420, GNorm = 0.5656, lr_0 = 3.6867e-04
Loss = 1.2059e-01, PNorm = 78.5503, GNorm = 0.6813, lr_0 = 3.6842e-04
Loss = 9.5786e-02, PNorm = 78.5583, GNorm = 0.5036, lr_0 = 3.6816e-04
Loss = 1.0244e-01, PNorm = 78.5671, GNorm = 0.6358, lr_0 = 3.6791e-04
Loss = 1.1604e-01, PNorm = 78.5760, GNorm = 0.6837, lr_0 = 3.6766e-04
Loss = 1.0219e-01, PNorm = 78.5861, GNorm = 0.5257, lr_0 = 3.6741e-04
Loss = 1.0542e-01, PNorm = 78.5942, GNorm = 0.6497, lr_0 = 3.6716e-04
Loss = 1.2659e-01, PNorm = 78.6020, GNorm = 0.5549, lr_0 = 3.6690e-04
Loss = 1.1645e-01, PNorm = 78.6143, GNorm = 0.5950, lr_0 = 3.6665e-04
Loss = 1.1196e-01, PNorm = 78.6231, GNorm = 1.0084, lr_0 = 3.6640e-04
Loss = 1.2384e-01, PNorm = 78.6314, GNorm = 0.6312, lr_0 = 3.6615e-04
Loss = 1.0857e-01, PNorm = 78.6368, GNorm = 0.7220, lr_0 = 3.6590e-04
Loss = 9.6869e-02, PNorm = 78.6415, GNorm = 1.3152, lr_0 = 3.6565e-04
Loss = 1.1318e-01, PNorm = 78.6497, GNorm = 0.7300, lr_0 = 3.6540e-04
Loss = 1.2233e-01, PNorm = 78.6551, GNorm = 0.5422, lr_0 = 3.6515e-04
Loss = 1.1816e-01, PNorm = 78.6604, GNorm = 0.7958, lr_0 = 3.6490e-04
Loss = 1.0424e-01, PNorm = 78.6675, GNorm = 0.5428, lr_0 = 3.6465e-04
Loss = 1.0766e-01, PNorm = 78.6751, GNorm = 0.7191, lr_0 = 3.6440e-04
Loss = 1.0609e-01, PNorm = 78.6825, GNorm = 0.9758, lr_0 = 3.6415e-04
Loss = 1.1394e-01, PNorm = 78.6930, GNorm = 0.5042, lr_0 = 3.6390e-04
Loss = 1.0552e-01, PNorm = 78.7044, GNorm = 0.6270, lr_0 = 3.6365e-04
Loss = 1.1099e-01, PNorm = 78.7117, GNorm = 0.5849, lr_0 = 3.6340e-04
Loss = 1.0669e-01, PNorm = 78.7198, GNorm = 0.6672, lr_0 = 3.6315e-04
Loss = 1.1512e-01, PNorm = 78.7244, GNorm = 0.7246, lr_0 = 3.6290e-04
Loss = 1.1271e-01, PNorm = 78.7363, GNorm = 1.0827, lr_0 = 3.6266e-04
Loss = 1.2240e-01, PNorm = 78.7455, GNorm = 0.7671, lr_0 = 3.6241e-04
Loss = 1.2100e-01, PNorm = 78.7522, GNorm = 0.5674, lr_0 = 3.6216e-04
Loss = 1.0772e-01, PNorm = 78.7625, GNorm = 0.6627, lr_0 = 3.6191e-04
Loss = 1.2031e-01, PNorm = 78.7707, GNorm = 0.8358, lr_0 = 3.6166e-04
Loss = 1.2022e-01, PNorm = 78.7790, GNorm = 0.7807, lr_0 = 3.6141e-04
Loss = 1.0784e-01, PNorm = 78.7896, GNorm = 0.5525, lr_0 = 3.6117e-04
Loss = 1.1032e-01, PNorm = 78.8015, GNorm = 0.6435, lr_0 = 3.6092e-04
Loss = 1.0022e-01, PNorm = 78.8116, GNorm = 0.6286, lr_0 = 3.6067e-04
Loss = 1.0167e-01, PNorm = 78.8170, GNorm = 0.6025, lr_0 = 3.6043e-04
Loss = 1.1001e-01, PNorm = 78.8183, GNorm = 0.5252, lr_0 = 3.6018e-04
Loss = 1.1265e-01, PNorm = 78.8235, GNorm = 0.4287, lr_0 = 3.5993e-04
Loss = 1.0804e-01, PNorm = 78.8302, GNorm = 1.1439, lr_0 = 3.5969e-04
Loss = 1.1432e-01, PNorm = 78.8396, GNorm = 0.8591, lr_0 = 3.5944e-04
Loss = 1.0448e-01, PNorm = 78.8450, GNorm = 0.5454, lr_0 = 3.5919e-04
Loss = 1.1393e-01, PNorm = 78.8528, GNorm = 0.5588, lr_0 = 3.5895e-04
Loss = 1.1470e-01, PNorm = 78.8652, GNorm = 0.5601, lr_0 = 3.5870e-04
Loss = 1.0941e-01, PNorm = 78.8769, GNorm = 0.7080, lr_0 = 3.5845e-04
Loss = 1.1272e-01, PNorm = 78.8910, GNorm = 0.5654, lr_0 = 3.5821e-04
Loss = 1.1794e-01, PNorm = 78.8986, GNorm = 0.6680, lr_0 = 3.5796e-04
Loss = 1.0583e-01, PNorm = 78.9075, GNorm = 0.6850, lr_0 = 3.5772e-04
Loss = 1.1028e-01, PNorm = 78.9191, GNorm = 0.7163, lr_0 = 3.5747e-04
Loss = 1.1311e-01, PNorm = 78.9297, GNorm = 0.7193, lr_0 = 3.5723e-04
Loss = 1.0704e-01, PNorm = 78.9358, GNorm = 0.5031, lr_0 = 3.5698e-04
Loss = 1.0807e-01, PNorm = 78.9367, GNorm = 0.6265, lr_0 = 3.5674e-04
Loss = 1.1519e-01, PNorm = 78.9446, GNorm = 0.5943, lr_0 = 3.5650e-04
Loss = 1.3228e-01, PNorm = 78.9563, GNorm = 0.6529, lr_0 = 3.5625e-04
Loss = 1.0764e-01, PNorm = 78.9667, GNorm = 0.6073, lr_0 = 3.5601e-04
Loss = 1.0889e-01, PNorm = 78.9760, GNorm = 0.5743, lr_0 = 3.5576e-04
Loss = 1.2496e-01, PNorm = 78.9800, GNorm = 0.6330, lr_0 = 3.5552e-04
Loss = 1.2168e-01, PNorm = 78.9888, GNorm = 0.7201, lr_0 = 3.5528e-04
Loss = 1.1672e-01, PNorm = 78.9995, GNorm = 0.6355, lr_0 = 3.5503e-04
Loss = 1.0378e-01, PNorm = 79.0052, GNorm = 1.3422, lr_0 = 3.5479e-04
Loss = 1.2720e-01, PNorm = 79.0117, GNorm = 0.6106, lr_0 = 3.5455e-04
Loss = 1.0046e-01, PNorm = 79.0186, GNorm = 0.8069, lr_0 = 3.5430e-04
Loss = 9.7014e-02, PNorm = 79.0293, GNorm = 0.5441, lr_0 = 3.5406e-04
Loss = 1.2422e-01, PNorm = 79.0406, GNorm = 1.1580, lr_0 = 3.5382e-04
Loss = 1.0806e-01, PNorm = 79.0490, GNorm = 0.5096, lr_0 = 3.5358e-04
Loss = 1.1567e-01, PNorm = 79.0567, GNorm = 0.5400, lr_0 = 3.5333e-04
Loss = 1.1281e-01, PNorm = 79.0696, GNorm = 0.5766, lr_0 = 3.5309e-04
Loss = 1.1055e-01, PNorm = 79.0776, GNorm = 0.4774, lr_0 = 3.5285e-04
Loss = 1.2535e-01, PNorm = 79.0854, GNorm = 0.5328, lr_0 = 3.5261e-04
Loss = 9.8875e-02, PNorm = 79.0935, GNorm = 0.6944, lr_0 = 3.5237e-04
Loss = 1.0758e-01, PNorm = 79.0991, GNorm = 0.6890, lr_0 = 3.5212e-04
Loss = 1.1738e-01, PNorm = 79.1048, GNorm = 0.5541, lr_0 = 3.5188e-04
Loss = 1.2585e-01, PNorm = 79.1070, GNorm = 0.7224, lr_0 = 3.5164e-04
Loss = 1.1874e-01, PNorm = 79.1127, GNorm = 1.0538, lr_0 = 3.5140e-04
Loss = 1.1096e-01, PNorm = 79.1262, GNorm = 0.5370, lr_0 = 3.5116e-04
Loss = 1.0813e-01, PNorm = 79.1362, GNorm = 0.6960, lr_0 = 3.5092e-04
Loss = 1.2099e-01, PNorm = 79.1439, GNorm = 0.5408, lr_0 = 3.5068e-04
Loss = 1.0937e-01, PNorm = 79.1498, GNorm = 0.8374, lr_0 = 3.5044e-04
Loss = 1.2407e-01, PNorm = 79.1514, GNorm = 0.6376, lr_0 = 3.5020e-04
Loss = 1.1050e-01, PNorm = 79.1578, GNorm = 0.5692, lr_0 = 3.4996e-04
Loss = 1.2513e-01, PNorm = 79.1658, GNorm = 0.7520, lr_0 = 3.4972e-04
Loss = 1.1333e-01, PNorm = 79.1775, GNorm = 0.7047, lr_0 = 3.4948e-04
Loss = 1.2466e-01, PNorm = 79.1918, GNorm = 0.6820, lr_0 = 3.4924e-04
Loss = 1.0514e-01, PNorm = 79.2008, GNorm = 0.4728, lr_0 = 3.4900e-04
Loss = 1.1349e-01, PNorm = 79.2023, GNorm = 0.7557, lr_0 = 3.4876e-04
Loss = 1.1123e-01, PNorm = 79.2135, GNorm = 0.6406, lr_0 = 3.4852e-04
Loss = 1.0642e-01, PNorm = 79.2277, GNorm = 0.6176, lr_0 = 3.4828e-04
Loss = 1.1349e-01, PNorm = 79.2387, GNorm = 0.9257, lr_0 = 3.4805e-04
Loss = 1.1936e-01, PNorm = 79.2489, GNorm = 0.7604, lr_0 = 3.4781e-04
Loss = 1.1652e-01, PNorm = 79.2575, GNorm = 0.6422, lr_0 = 3.4757e-04
Loss = 1.1727e-01, PNorm = 79.2692, GNorm = 0.5118, lr_0 = 3.4733e-04
Loss = 1.1870e-01, PNorm = 79.2750, GNorm = 0.7294, lr_0 = 3.4709e-04
Loss = 1.1641e-01, PNorm = 79.2816, GNorm = 0.8349, lr_0 = 3.4686e-04
Loss = 1.3191e-01, PNorm = 79.2911, GNorm = 0.5423, lr_0 = 3.4662e-04
Loss = 1.0560e-01, PNorm = 79.3009, GNorm = 0.5989, lr_0 = 3.4638e-04
Loss = 1.2152e-01, PNorm = 79.3109, GNorm = 0.6539, lr_0 = 3.4614e-04
Loss = 1.0686e-01, PNorm = 79.3188, GNorm = 0.5854, lr_0 = 3.4591e-04
Loss = 9.7640e-02, PNorm = 79.3283, GNorm = 0.5456, lr_0 = 3.4567e-04
Loss = 1.2117e-01, PNorm = 79.3309, GNorm = 0.6395, lr_0 = 3.4543e-04
Loss = 1.1055e-01, PNorm = 79.3369, GNorm = 0.6756, lr_0 = 3.4520e-04
Loss = 9.6133e-02, PNorm = 79.3434, GNorm = 0.5073, lr_0 = 3.4496e-04
Loss = 1.3393e-01, PNorm = 79.3501, GNorm = 0.8358, lr_0 = 3.4472e-04
Loss = 1.2012e-01, PNorm = 79.3589, GNorm = 0.6648, lr_0 = 3.4449e-04
Loss = 1.0551e-01, PNorm = 79.3695, GNorm = 0.9088, lr_0 = 3.4425e-04
Loss = 1.0684e-01, PNorm = 79.3848, GNorm = 0.5861, lr_0 = 3.4402e-04
Loss = 1.1629e-01, PNorm = 79.3914, GNorm = 0.6042, lr_0 = 3.4378e-04
Loss = 1.1156e-01, PNorm = 79.3944, GNorm = 0.5771, lr_0 = 3.4354e-04
Loss = 1.3424e-01, PNorm = 79.4008, GNorm = 0.6431, lr_0 = 3.4331e-04
Validation mae = 0.233271
Epoch 15
Loss = 1.0829e-01, PNorm = 79.4136, GNorm = 0.8095, lr_0 = 3.4307e-04
Loss = 9.1729e-02, PNorm = 79.4255, GNorm = 0.9980, lr_0 = 3.4284e-04
Loss = 1.0810e-01, PNorm = 79.4334, GNorm = 0.5965, lr_0 = 3.4260e-04
Loss = 1.0748e-01, PNorm = 79.4410, GNorm = 0.9993, lr_0 = 3.4237e-04
Loss = 9.5933e-02, PNorm = 79.4445, GNorm = 0.7560, lr_0 = 3.4213e-04
Loss = 9.2823e-02, PNorm = 79.4505, GNorm = 0.5137, lr_0 = 3.4190e-04
Loss = 9.6800e-02, PNorm = 79.4562, GNorm = 0.7212, lr_0 = 3.4167e-04
Loss = 9.6156e-02, PNorm = 79.4620, GNorm = 0.4296, lr_0 = 3.4143e-04
Loss = 1.0137e-01, PNorm = 79.4682, GNorm = 0.5282, lr_0 = 3.4120e-04
Loss = 1.0454e-01, PNorm = 79.4750, GNorm = 0.7273, lr_0 = 3.4096e-04
Loss = 1.0696e-01, PNorm = 79.4823, GNorm = 0.6981, lr_0 = 3.4073e-04
Loss = 1.0118e-01, PNorm = 79.4889, GNorm = 0.5523, lr_0 = 3.4050e-04
Loss = 1.1745e-01, PNorm = 79.5013, GNorm = 0.7786, lr_0 = 3.4026e-04
Loss = 1.0486e-01, PNorm = 79.5098, GNorm = 0.5094, lr_0 = 3.4003e-04
Loss = 1.0661e-01, PNorm = 79.5177, GNorm = 0.7344, lr_0 = 3.3980e-04
Loss = 1.1614e-01, PNorm = 79.5286, GNorm = 0.4596, lr_0 = 3.3956e-04
Loss = 1.0787e-01, PNorm = 79.5353, GNorm = 0.6587, lr_0 = 3.3933e-04
Loss = 1.0215e-01, PNorm = 79.5450, GNorm = 0.6006, lr_0 = 3.3910e-04
Loss = 1.0412e-01, PNorm = 79.5544, GNorm = 0.7766, lr_0 = 3.3887e-04
Loss = 1.2131e-01, PNorm = 79.5686, GNorm = 0.9984, lr_0 = 3.3864e-04
Loss = 1.0484e-01, PNorm = 79.5818, GNorm = 0.8207, lr_0 = 3.3840e-04
Loss = 1.1129e-01, PNorm = 79.5983, GNorm = 0.6386, lr_0 = 3.3817e-04
Loss = 1.0161e-01, PNorm = 79.6075, GNorm = 0.8094, lr_0 = 3.3794e-04
Loss = 1.0015e-01, PNorm = 79.6089, GNorm = 0.6173, lr_0 = 3.3771e-04
Loss = 9.9159e-02, PNorm = 79.6152, GNorm = 0.7714, lr_0 = 3.3748e-04
Loss = 1.1560e-01, PNorm = 79.6168, GNorm = 0.6819, lr_0 = 3.3725e-04
Loss = 1.0526e-01, PNorm = 79.6220, GNorm = 0.7826, lr_0 = 3.3701e-04
Loss = 1.0373e-01, PNorm = 79.6280, GNorm = 0.5609, lr_0 = 3.3678e-04
Loss = 9.7758e-02, PNorm = 79.6340, GNorm = 0.6366, lr_0 = 3.3655e-04
Loss = 8.9600e-02, PNorm = 79.6402, GNorm = 0.6660, lr_0 = 3.3632e-04
Loss = 1.1823e-01, PNorm = 79.6442, GNorm = 0.6036, lr_0 = 3.3609e-04
Loss = 1.1393e-01, PNorm = 79.6507, GNorm = 0.6121, lr_0 = 3.3586e-04
Loss = 1.0694e-01, PNorm = 79.6611, GNorm = 0.7348, lr_0 = 3.3563e-04
Loss = 1.1330e-01, PNorm = 79.6709, GNorm = 0.7330, lr_0 = 3.3540e-04
Loss = 1.1415e-01, PNorm = 79.6824, GNorm = 0.6721, lr_0 = 3.3517e-04
Loss = 1.0148e-01, PNorm = 79.6869, GNorm = 0.7763, lr_0 = 3.3494e-04
Loss = 1.1230e-01, PNorm = 79.6984, GNorm = 0.6326, lr_0 = 3.3471e-04
Loss = 1.1183e-01, PNorm = 79.7105, GNorm = 0.6927, lr_0 = 3.3448e-04
Loss = 1.0477e-01, PNorm = 79.7178, GNorm = 0.6390, lr_0 = 3.3425e-04
Loss = 1.1654e-01, PNorm = 79.7225, GNorm = 0.8689, lr_0 = 3.3403e-04
Loss = 1.1459e-01, PNorm = 79.7316, GNorm = 0.5538, lr_0 = 3.3380e-04
Loss = 1.1285e-01, PNorm = 79.7354, GNorm = 0.5854, lr_0 = 3.3357e-04
Loss = 9.9121e-02, PNorm = 79.7406, GNorm = 0.6949, lr_0 = 3.3334e-04
Loss = 9.9466e-02, PNorm = 79.7485, GNorm = 0.5425, lr_0 = 3.3311e-04
Loss = 1.1343e-01, PNorm = 79.7560, GNorm = 0.7734, lr_0 = 3.3288e-04
Loss = 1.0369e-01, PNorm = 79.7610, GNorm = 0.9101, lr_0 = 3.3265e-04
Loss = 1.2024e-01, PNorm = 79.7709, GNorm = 0.9514, lr_0 = 3.3243e-04
Loss = 1.1357e-01, PNorm = 79.7801, GNorm = 1.0627, lr_0 = 3.3220e-04
Loss = 1.0128e-01, PNorm = 79.7867, GNorm = 1.0921, lr_0 = 3.3197e-04
Loss = 1.0835e-01, PNorm = 79.7986, GNorm = 0.5816, lr_0 = 3.3174e-04
Loss = 1.1307e-01, PNorm = 79.8059, GNorm = 0.6224, lr_0 = 3.3152e-04
Loss = 1.2113e-01, PNorm = 79.8155, GNorm = 2.0753, lr_0 = 3.3129e-04
Loss = 1.0424e-01, PNorm = 79.8222, GNorm = 0.7327, lr_0 = 3.3106e-04
Loss = 1.1376e-01, PNorm = 79.8274, GNorm = 0.6127, lr_0 = 3.3084e-04
Loss = 1.0812e-01, PNorm = 79.8331, GNorm = 0.6467, lr_0 = 3.3061e-04
Loss = 1.0702e-01, PNorm = 79.8399, GNorm = 0.6163, lr_0 = 3.3038e-04
Loss = 1.1113e-01, PNorm = 79.8474, GNorm = 0.7390, lr_0 = 3.3016e-04
Loss = 9.7588e-02, PNorm = 79.8562, GNorm = 0.6935, lr_0 = 3.2993e-04
Loss = 1.0728e-01, PNorm = 79.8670, GNorm = 0.5302, lr_0 = 3.2970e-04
Loss = 1.0833e-01, PNorm = 79.8764, GNorm = 0.5692, lr_0 = 3.2948e-04
Loss = 1.0889e-01, PNorm = 79.8807, GNorm = 0.7399, lr_0 = 3.2925e-04
Loss = 1.2203e-01, PNorm = 79.8874, GNorm = 0.7886, lr_0 = 3.2903e-04
Loss = 1.0847e-01, PNorm = 79.8972, GNorm = 0.6879, lr_0 = 3.2880e-04
Loss = 9.4636e-02, PNorm = 79.9005, GNorm = 0.5522, lr_0 = 3.2858e-04
Loss = 1.2627e-01, PNorm = 79.8997, GNorm = 0.6371, lr_0 = 3.2835e-04
Loss = 1.2602e-01, PNorm = 79.9082, GNorm = 0.7760, lr_0 = 3.2813e-04
Loss = 1.0645e-01, PNorm = 79.9198, GNorm = 0.6451, lr_0 = 3.2790e-04
Loss = 1.1926e-01, PNorm = 79.9280, GNorm = 1.1242, lr_0 = 3.2768e-04
Loss = 1.2057e-01, PNorm = 79.9344, GNorm = 0.6493, lr_0 = 3.2745e-04
Loss = 1.0549e-01, PNorm = 79.9438, GNorm = 0.4627, lr_0 = 3.2723e-04
Loss = 1.2079e-01, PNorm = 79.9524, GNorm = 0.7345, lr_0 = 3.2700e-04
Loss = 1.2478e-01, PNorm = 79.9574, GNorm = 0.7397, lr_0 = 3.2678e-04
Loss = 1.1833e-01, PNorm = 79.9706, GNorm = 0.8197, lr_0 = 3.2656e-04
Loss = 1.0805e-01, PNorm = 79.9739, GNorm = 0.5827, lr_0 = 3.2633e-04
Loss = 1.1305e-01, PNorm = 79.9809, GNorm = 0.9926, lr_0 = 3.2611e-04
Loss = 9.6032e-02, PNorm = 79.9820, GNorm = 0.8337, lr_0 = 3.2589e-04
Loss = 1.0805e-01, PNorm = 79.9873, GNorm = 0.6803, lr_0 = 3.2566e-04
Loss = 1.2082e-01, PNorm = 79.9939, GNorm = 0.5731, lr_0 = 3.2544e-04
Loss = 1.0977e-01, PNorm = 80.0020, GNorm = 0.7726, lr_0 = 3.2522e-04
Loss = 1.1376e-01, PNorm = 80.0150, GNorm = 0.5450, lr_0 = 3.2499e-04
Loss = 1.1670e-01, PNorm = 80.0230, GNorm = 0.7041, lr_0 = 3.2477e-04
Loss = 1.0288e-01, PNorm = 80.0306, GNorm = 0.8873, lr_0 = 3.2455e-04
Loss = 9.8467e-02, PNorm = 80.0402, GNorm = 0.6255, lr_0 = 3.2433e-04
Loss = 1.0487e-01, PNorm = 80.0453, GNorm = 0.7425, lr_0 = 3.2410e-04
Loss = 1.1339e-01, PNorm = 80.0492, GNorm = 0.5786, lr_0 = 3.2388e-04
Loss = 9.5833e-02, PNorm = 80.0569, GNorm = 0.8458, lr_0 = 3.2366e-04
Loss = 1.1510e-01, PNorm = 80.0609, GNorm = 0.5335, lr_0 = 3.2344e-04
Loss = 1.1546e-01, PNorm = 80.0667, GNorm = 0.9223, lr_0 = 3.2322e-04
Loss = 1.1477e-01, PNorm = 80.0739, GNorm = 0.6379, lr_0 = 3.2300e-04
Loss = 9.6892e-02, PNorm = 80.0811, GNorm = 0.6144, lr_0 = 3.2277e-04
Loss = 1.1566e-01, PNorm = 80.0884, GNorm = 0.6459, lr_0 = 3.2255e-04
Loss = 1.0482e-01, PNorm = 80.0978, GNorm = 0.5961, lr_0 = 3.2233e-04
Loss = 1.2007e-01, PNorm = 80.1024, GNorm = 0.9239, lr_0 = 3.2211e-04
Loss = 9.2453e-02, PNorm = 80.1090, GNorm = 0.8470, lr_0 = 3.2189e-04
Loss = 9.9334e-02, PNorm = 80.1114, GNorm = 0.6507, lr_0 = 3.2167e-04
Loss = 1.0803e-01, PNorm = 80.1149, GNorm = 0.7261, lr_0 = 3.2145e-04
Loss = 1.0397e-01, PNorm = 80.1199, GNorm = 0.5331, lr_0 = 3.2123e-04
Loss = 1.0392e-01, PNorm = 80.1275, GNorm = 1.0029, lr_0 = 3.2101e-04
Loss = 1.1314e-01, PNorm = 80.1376, GNorm = 0.6318, lr_0 = 3.2079e-04
Loss = 9.4934e-02, PNorm = 80.1445, GNorm = 0.6690, lr_0 = 3.2057e-04
Loss = 1.2023e-01, PNorm = 80.1510, GNorm = 0.6732, lr_0 = 3.2035e-04
Loss = 1.0903e-01, PNorm = 80.1574, GNorm = 0.7146, lr_0 = 3.2013e-04
Loss = 1.1724e-01, PNorm = 80.1614, GNorm = 0.5868, lr_0 = 3.1991e-04
Loss = 1.0856e-01, PNorm = 80.1682, GNorm = 0.7957, lr_0 = 3.1969e-04
Loss = 1.1253e-01, PNorm = 80.1797, GNorm = 0.8405, lr_0 = 3.1947e-04
Loss = 9.9059e-02, PNorm = 80.1854, GNorm = 0.6046, lr_0 = 3.1925e-04
Loss = 1.0571e-01, PNorm = 80.1920, GNorm = 0.9321, lr_0 = 3.1904e-04
Loss = 1.0436e-01, PNorm = 80.1973, GNorm = 0.7831, lr_0 = 3.1882e-04
Loss = 9.6984e-02, PNorm = 80.2056, GNorm = 0.7752, lr_0 = 3.1860e-04
Loss = 1.2510e-01, PNorm = 80.2119, GNorm = 0.8754, lr_0 = 3.1838e-04
Loss = 1.0356e-01, PNorm = 80.2165, GNorm = 0.5181, lr_0 = 3.1816e-04
Loss = 1.1240e-01, PNorm = 80.2252, GNorm = 0.7270, lr_0 = 3.1794e-04
Loss = 1.0549e-01, PNorm = 80.2351, GNorm = 0.6411, lr_0 = 3.1773e-04
Loss = 1.1910e-01, PNorm = 80.2456, GNorm = 0.7333, lr_0 = 3.1751e-04
Loss = 1.0638e-01, PNorm = 80.2509, GNorm = 0.6300, lr_0 = 3.1729e-04
Loss = 1.0741e-01, PNorm = 80.2564, GNorm = 0.5383, lr_0 = 3.1707e-04
Loss = 1.0995e-01, PNorm = 80.2597, GNorm = 0.6950, lr_0 = 3.1686e-04
Loss = 1.0507e-01, PNorm = 80.2613, GNorm = 0.9858, lr_0 = 3.1664e-04
Loss = 1.1433e-01, PNorm = 80.2672, GNorm = 0.5776, lr_0 = 3.1642e-04
Loss = 1.1299e-01, PNorm = 80.2727, GNorm = 0.5355, lr_0 = 3.1621e-04
Validation mae = 0.240724
Epoch 16
Loss = 1.0281e-01, PNorm = 80.2799, GNorm = 0.8014, lr_0 = 3.1599e-04
Loss = 9.9284e-02, PNorm = 80.2892, GNorm = 0.5918, lr_0 = 3.1577e-04
Loss = 1.0536e-01, PNorm = 80.2983, GNorm = 0.6755, lr_0 = 3.1556e-04
Loss = 1.0985e-01, PNorm = 80.3044, GNorm = 1.2039, lr_0 = 3.1534e-04
Loss = 1.0411e-01, PNorm = 80.3121, GNorm = 0.4921, lr_0 = 3.1512e-04
Loss = 1.1201e-01, PNorm = 80.3225, GNorm = 0.8969, lr_0 = 3.1491e-04
Loss = 9.4462e-02, PNorm = 80.3264, GNorm = 0.7124, lr_0 = 3.1469e-04
Loss = 1.0250e-01, PNorm = 80.3367, GNorm = 0.4957, lr_0 = 3.1448e-04
Loss = 1.0330e-01, PNorm = 80.3423, GNorm = 0.7465, lr_0 = 3.1426e-04
Loss = 1.0450e-01, PNorm = 80.3504, GNorm = 0.6018, lr_0 = 3.1405e-04
Loss = 1.2406e-01, PNorm = 80.3599, GNorm = 0.7466, lr_0 = 3.1383e-04
Loss = 1.0032e-01, PNorm = 80.3727, GNorm = 0.6734, lr_0 = 3.1362e-04
Loss = 9.8780e-02, PNorm = 80.3815, GNorm = 0.8826, lr_0 = 3.1340e-04
Loss = 1.0281e-01, PNorm = 80.3865, GNorm = 0.7549, lr_0 = 3.1319e-04
Loss = 9.7751e-02, PNorm = 80.3905, GNorm = 0.8221, lr_0 = 3.1297e-04
Loss = 9.6335e-02, PNorm = 80.3913, GNorm = 0.6723, lr_0 = 3.1276e-04
Loss = 1.0048e-01, PNorm = 80.3981, GNorm = 0.6806, lr_0 = 3.1254e-04
Loss = 8.7503e-02, PNorm = 80.4044, GNorm = 0.6189, lr_0 = 3.1233e-04
Loss = 1.0615e-01, PNorm = 80.4150, GNorm = 0.6576, lr_0 = 3.1212e-04
Loss = 1.0344e-01, PNorm = 80.4215, GNorm = 0.6902, lr_0 = 3.1190e-04
Loss = 9.1509e-02, PNorm = 80.4284, GNorm = 0.5282, lr_0 = 3.1169e-04
Loss = 9.9801e-02, PNorm = 80.4365, GNorm = 0.7600, lr_0 = 3.1147e-04
Loss = 1.0368e-01, PNorm = 80.4426, GNorm = 0.6698, lr_0 = 3.1126e-04
Loss = 1.0400e-01, PNorm = 80.4490, GNorm = 0.6710, lr_0 = 3.1105e-04
Loss = 1.0920e-01, PNorm = 80.4528, GNorm = 0.9302, lr_0 = 3.1083e-04
Loss = 1.0305e-01, PNorm = 80.4581, GNorm = 0.7897, lr_0 = 3.1062e-04
Loss = 9.8967e-02, PNorm = 80.4617, GNorm = 0.9504, lr_0 = 3.1041e-04
Loss = 1.1118e-01, PNorm = 80.4689, GNorm = 1.0324, lr_0 = 3.1020e-04
Loss = 1.0244e-01, PNorm = 80.4797, GNorm = 0.5740, lr_0 = 3.0998e-04
Loss = 9.6585e-02, PNorm = 80.4873, GNorm = 0.6644, lr_0 = 3.0977e-04
Loss = 9.4742e-02, PNorm = 80.4953, GNorm = 0.6221, lr_0 = 3.0956e-04
Loss = 9.7461e-02, PNorm = 80.5050, GNorm = 0.7128, lr_0 = 3.0935e-04
Loss = 1.0030e-01, PNorm = 80.5128, GNorm = 0.5359, lr_0 = 3.0914e-04
Loss = 1.0013e-01, PNorm = 80.5189, GNorm = 0.6529, lr_0 = 3.0892e-04
Loss = 9.7654e-02, PNorm = 80.5230, GNorm = 0.5715, lr_0 = 3.0871e-04
Loss = 1.1030e-01, PNorm = 80.5280, GNorm = 0.5678, lr_0 = 3.0850e-04
Loss = 9.7943e-02, PNorm = 80.5343, GNorm = 0.5889, lr_0 = 3.0829e-04
Loss = 1.1222e-01, PNorm = 80.5364, GNorm = 0.7405, lr_0 = 3.0808e-04
Loss = 1.1092e-01, PNorm = 80.5417, GNorm = 0.7110, lr_0 = 3.0787e-04
Loss = 1.2506e-01, PNorm = 80.5470, GNorm = 0.5891, lr_0 = 3.0766e-04
Loss = 9.9574e-02, PNorm = 80.5529, GNorm = 0.6473, lr_0 = 3.0745e-04
Loss = 9.5660e-02, PNorm = 80.5581, GNorm = 0.6552, lr_0 = 3.0723e-04
Loss = 1.2567e-01, PNorm = 80.5655, GNorm = 0.5662, lr_0 = 3.0702e-04
Loss = 1.0355e-01, PNorm = 80.5779, GNorm = 0.9605, lr_0 = 3.0681e-04
Loss = 1.0560e-01, PNorm = 80.5870, GNorm = 0.6294, lr_0 = 3.0660e-04
Loss = 1.1317e-01, PNorm = 80.5940, GNorm = 0.7602, lr_0 = 3.0639e-04
Loss = 1.0583e-01, PNorm = 80.6039, GNorm = 0.7115, lr_0 = 3.0618e-04
Loss = 1.0633e-01, PNorm = 80.6106, GNorm = 0.5935, lr_0 = 3.0597e-04
Loss = 9.9683e-02, PNorm = 80.6168, GNorm = 0.6351, lr_0 = 3.0576e-04
Loss = 9.8516e-02, PNorm = 80.6231, GNorm = 0.7006, lr_0 = 3.0555e-04
Loss = 1.0662e-01, PNorm = 80.6293, GNorm = 0.9447, lr_0 = 3.0535e-04
Loss = 9.4673e-02, PNorm = 80.6393, GNorm = 0.4410, lr_0 = 3.0514e-04
Loss = 8.5205e-02, PNorm = 80.6466, GNorm = 0.6672, lr_0 = 3.0493e-04
Loss = 1.0268e-01, PNorm = 80.6504, GNorm = 0.7602, lr_0 = 3.0472e-04
Loss = 8.9634e-02, PNorm = 80.6519, GNorm = 0.5140, lr_0 = 3.0451e-04
Loss = 9.9888e-02, PNorm = 80.6530, GNorm = 0.6474, lr_0 = 3.0430e-04
Loss = 1.1367e-01, PNorm = 80.6594, GNorm = 0.6195, lr_0 = 3.0409e-04
Loss = 1.1265e-01, PNorm = 80.6673, GNorm = 0.7100, lr_0 = 3.0388e-04
Loss = 9.0760e-02, PNorm = 80.6736, GNorm = 0.5424, lr_0 = 3.0368e-04
Loss = 1.1030e-01, PNorm = 80.6793, GNorm = 0.9532, lr_0 = 3.0347e-04
Loss = 1.0456e-01, PNorm = 80.6885, GNorm = 0.6375, lr_0 = 3.0326e-04
Loss = 1.0607e-01, PNorm = 80.6950, GNorm = 1.1900, lr_0 = 3.0305e-04
Loss = 9.4707e-02, PNorm = 80.6998, GNorm = 0.7831, lr_0 = 3.0284e-04
Loss = 1.1092e-01, PNorm = 80.7063, GNorm = 0.6441, lr_0 = 3.0264e-04
Loss = 1.1143e-01, PNorm = 80.7162, GNorm = 0.7179, lr_0 = 3.0243e-04
Loss = 1.0323e-01, PNorm = 80.7229, GNorm = 0.5384, lr_0 = 3.0222e-04
Loss = 9.5507e-02, PNorm = 80.7266, GNorm = 0.6523, lr_0 = 3.0202e-04
Loss = 1.0038e-01, PNorm = 80.7348, GNorm = 0.5943, lr_0 = 3.0181e-04
Loss = 1.0860e-01, PNorm = 80.7447, GNorm = 0.7036, lr_0 = 3.0160e-04
Loss = 1.1071e-01, PNorm = 80.7539, GNorm = 0.9252, lr_0 = 3.0140e-04
Loss = 9.4490e-02, PNorm = 80.7621, GNorm = 0.5647, lr_0 = 3.0119e-04
Loss = 1.0056e-01, PNorm = 80.7711, GNorm = 0.6578, lr_0 = 3.0098e-04
Loss = 1.2038e-01, PNorm = 80.7814, GNorm = 1.0829, lr_0 = 3.0078e-04
Loss = 9.6257e-02, PNorm = 80.7888, GNorm = 0.6206, lr_0 = 3.0057e-04
Loss = 9.9864e-02, PNorm = 80.7904, GNorm = 0.8797, lr_0 = 3.0036e-04
Loss = 9.3857e-02, PNorm = 80.7991, GNorm = 0.5340, lr_0 = 3.0016e-04
Loss = 1.0440e-01, PNorm = 80.8011, GNorm = 0.6535, lr_0 = 2.9995e-04
Loss = 1.0929e-01, PNorm = 80.8059, GNorm = 0.7499, lr_0 = 2.9975e-04
Loss = 1.0607e-01, PNorm = 80.8141, GNorm = 0.8093, lr_0 = 2.9954e-04
Loss = 1.0086e-01, PNorm = 80.8176, GNorm = 0.6642, lr_0 = 2.9934e-04
Loss = 1.2158e-01, PNorm = 80.8243, GNorm = 0.8732, lr_0 = 2.9913e-04
Loss = 1.1346e-01, PNorm = 80.8315, GNorm = 0.7357, lr_0 = 2.9893e-04
Loss = 1.0335e-01, PNorm = 80.8379, GNorm = 0.6239, lr_0 = 2.9872e-04
Loss = 1.1310e-01, PNorm = 80.8401, GNorm = 0.6710, lr_0 = 2.9852e-04
Loss = 1.0211e-01, PNorm = 80.8462, GNorm = 0.5666, lr_0 = 2.9831e-04
Loss = 9.8050e-02, PNorm = 80.8517, GNorm = 0.5305, lr_0 = 2.9811e-04
Loss = 1.2861e-01, PNorm = 80.8558, GNorm = 0.7395, lr_0 = 2.9790e-04
Loss = 1.0570e-01, PNorm = 80.8623, GNorm = 0.7469, lr_0 = 2.9770e-04
Loss = 9.2093e-02, PNorm = 80.8709, GNorm = 0.5208, lr_0 = 2.9750e-04
Loss = 9.4851e-02, PNorm = 80.8751, GNorm = 0.5611, lr_0 = 2.9729e-04
Loss = 1.1113e-01, PNorm = 80.8823, GNorm = 0.7307, lr_0 = 2.9709e-04
Loss = 1.1236e-01, PNorm = 80.8874, GNorm = 0.5193, lr_0 = 2.9689e-04
Loss = 1.0796e-01, PNorm = 80.8952, GNorm = 0.6011, lr_0 = 2.9668e-04
Loss = 1.0671e-01, PNorm = 80.9017, GNorm = 0.6229, lr_0 = 2.9648e-04
Loss = 1.0347e-01, PNorm = 80.9068, GNorm = 0.5825, lr_0 = 2.9628e-04
Loss = 1.0931e-01, PNorm = 80.9161, GNorm = 0.6636, lr_0 = 2.9607e-04
Loss = 1.1108e-01, PNorm = 80.9257, GNorm = 0.6027, lr_0 = 2.9587e-04
Loss = 1.0714e-01, PNorm = 80.9352, GNorm = 0.8817, lr_0 = 2.9567e-04
Loss = 1.0157e-01, PNorm = 80.9412, GNorm = 0.7655, lr_0 = 2.9546e-04
Loss = 1.0102e-01, PNorm = 80.9507, GNorm = 0.6721, lr_0 = 2.9526e-04
Loss = 1.0046e-01, PNorm = 80.9572, GNorm = 0.8723, lr_0 = 2.9506e-04
Loss = 1.0655e-01, PNorm = 80.9623, GNorm = 0.5031, lr_0 = 2.9486e-04
Loss = 1.1459e-01, PNorm = 80.9717, GNorm = 0.5363, lr_0 = 2.9466e-04
Loss = 1.0656e-01, PNorm = 80.9797, GNorm = 0.7617, lr_0 = 2.9445e-04
Loss = 1.1817e-01, PNorm = 80.9863, GNorm = 0.7098, lr_0 = 2.9425e-04
Loss = 1.1028e-01, PNorm = 80.9916, GNorm = 0.7438, lr_0 = 2.9405e-04
Loss = 9.7905e-02, PNorm = 80.9975, GNorm = 0.6063, lr_0 = 2.9385e-04
Loss = 1.0003e-01, PNorm = 81.0028, GNorm = 0.8575, lr_0 = 2.9365e-04
Loss = 9.8413e-02, PNorm = 81.0097, GNorm = 0.7161, lr_0 = 2.9345e-04
Loss = 9.4798e-02, PNorm = 81.0127, GNorm = 0.5386, lr_0 = 2.9325e-04
Loss = 1.0332e-01, PNorm = 81.0188, GNorm = 1.0042, lr_0 = 2.9305e-04
Loss = 1.0751e-01, PNorm = 81.0258, GNorm = 0.8355, lr_0 = 2.9284e-04
Loss = 1.0615e-01, PNorm = 81.0339, GNorm = 0.6833, lr_0 = 2.9264e-04
Loss = 1.0793e-01, PNorm = 81.0386, GNorm = 0.6042, lr_0 = 2.9244e-04
Loss = 1.0477e-01, PNorm = 81.0430, GNorm = 0.8120, lr_0 = 2.9224e-04
Loss = 1.0217e-01, PNorm = 81.0487, GNorm = 0.7822, lr_0 = 2.9204e-04
Loss = 9.6297e-02, PNorm = 81.0548, GNorm = 0.6817, lr_0 = 2.9184e-04
Loss = 1.2083e-01, PNorm = 81.0600, GNorm = 0.8333, lr_0 = 2.9164e-04
Loss = 1.1039e-01, PNorm = 81.0701, GNorm = 0.6331, lr_0 = 2.9144e-04
Loss = 1.0677e-01, PNorm = 81.0708, GNorm = 0.5653, lr_0 = 2.9124e-04
Validation mae = 0.232166
Epoch 17
Loss = 9.8481e-02, PNorm = 81.0737, GNorm = 0.6403, lr_0 = 2.9104e-04
Loss = 1.0193e-01, PNorm = 81.0817, GNorm = 0.5326, lr_0 = 2.9084e-04
Loss = 1.0999e-01, PNorm = 81.0872, GNorm = 1.2601, lr_0 = 2.9065e-04
Loss = 1.0034e-01, PNorm = 81.0945, GNorm = 0.5588, lr_0 = 2.9045e-04
Loss = 9.5457e-02, PNorm = 81.1029, GNorm = 0.8834, lr_0 = 2.9025e-04
Loss = 9.0529e-02, PNorm = 81.1087, GNorm = 0.6836, lr_0 = 2.9005e-04
Loss = 9.7266e-02, PNorm = 81.1124, GNorm = 0.6835, lr_0 = 2.8985e-04
Loss = 1.0843e-01, PNorm = 81.1155, GNorm = 0.5699, lr_0 = 2.8965e-04
Loss = 9.9208e-02, PNorm = 81.1199, GNorm = 0.6558, lr_0 = 2.8945e-04
Loss = 1.0949e-01, PNorm = 81.1261, GNorm = 0.5456, lr_0 = 2.8925e-04
Loss = 9.9187e-02, PNorm = 81.1345, GNorm = 0.6097, lr_0 = 2.8906e-04
Loss = 9.7215e-02, PNorm = 81.1410, GNorm = 0.6591, lr_0 = 2.8886e-04
Loss = 8.8682e-02, PNorm = 81.1481, GNorm = 0.5838, lr_0 = 2.8866e-04
Loss = 1.1028e-01, PNorm = 81.1539, GNorm = 0.9110, lr_0 = 2.8846e-04
Loss = 1.0407e-01, PNorm = 81.1621, GNorm = 0.6409, lr_0 = 2.8826e-04
Loss = 1.1909e-01, PNorm = 81.1717, GNorm = 0.8934, lr_0 = 2.8807e-04
Loss = 9.1680e-02, PNorm = 81.1795, GNorm = 0.6489, lr_0 = 2.8787e-04
Loss = 8.6673e-02, PNorm = 81.1866, GNorm = 0.5868, lr_0 = 2.8767e-04
Loss = 1.0108e-01, PNorm = 81.1904, GNorm = 0.5358, lr_0 = 2.8748e-04
Loss = 9.3622e-02, PNorm = 81.1975, GNorm = 0.6096, lr_0 = 2.8728e-04
Loss = 1.0776e-01, PNorm = 81.2054, GNorm = 0.5763, lr_0 = 2.8708e-04
Loss = 1.0757e-01, PNorm = 81.2100, GNorm = 0.5777, lr_0 = 2.8689e-04
Loss = 9.0154e-02, PNorm = 81.2150, GNorm = 0.8383, lr_0 = 2.8669e-04
Loss = 1.0207e-01, PNorm = 81.2208, GNorm = 0.9986, lr_0 = 2.8649e-04
Loss = 9.7402e-02, PNorm = 81.2263, GNorm = 0.7972, lr_0 = 2.8630e-04
Loss = 9.2116e-02, PNorm = 81.2317, GNorm = 0.5763, lr_0 = 2.8610e-04
Loss = 1.0072e-01, PNorm = 81.2348, GNorm = 0.5416, lr_0 = 2.8590e-04
Loss = 9.8784e-02, PNorm = 81.2407, GNorm = 0.6887, lr_0 = 2.8571e-04
Loss = 1.0510e-01, PNorm = 81.2480, GNorm = 0.5090, lr_0 = 2.8551e-04
Loss = 9.6878e-02, PNorm = 81.2532, GNorm = 0.6127, lr_0 = 2.8532e-04
Loss = 9.1655e-02, PNorm = 81.2586, GNorm = 0.8482, lr_0 = 2.8512e-04
Loss = 9.9490e-02, PNorm = 81.2624, GNorm = 0.7944, lr_0 = 2.8493e-04
Loss = 1.0709e-01, PNorm = 81.2679, GNorm = 0.6504, lr_0 = 2.8473e-04
Loss = 9.4194e-02, PNorm = 81.2730, GNorm = 0.5784, lr_0 = 2.8454e-04
Loss = 1.0320e-01, PNorm = 81.2757, GNorm = 0.8158, lr_0 = 2.8434e-04
Loss = 1.0184e-01, PNorm = 81.2842, GNorm = 0.4451, lr_0 = 2.8415e-04
Loss = 9.9790e-02, PNorm = 81.2939, GNorm = 0.8037, lr_0 = 2.8395e-04
Loss = 1.0777e-01, PNorm = 81.3017, GNorm = 0.9969, lr_0 = 2.8376e-04
Loss = 9.8551e-02, PNorm = 81.3071, GNorm = 0.6930, lr_0 = 2.8356e-04
Loss = 9.8455e-02, PNorm = 81.3115, GNorm = 0.9829, lr_0 = 2.8337e-04
Loss = 1.0737e-01, PNorm = 81.3160, GNorm = 0.6021, lr_0 = 2.8317e-04
Loss = 1.0238e-01, PNorm = 81.3184, GNorm = 1.0360, lr_0 = 2.8298e-04
Loss = 9.4322e-02, PNorm = 81.3240, GNorm = 0.7469, lr_0 = 2.8279e-04
Loss = 1.0054e-01, PNorm = 81.3301, GNorm = 0.5527, lr_0 = 2.8259e-04
Loss = 1.2577e-01, PNorm = 81.3345, GNorm = 0.9138, lr_0 = 2.8240e-04
Loss = 1.0684e-01, PNorm = 81.3454, GNorm = 0.8634, lr_0 = 2.8221e-04
Loss = 9.9541e-02, PNorm = 81.3514, GNorm = 0.5288, lr_0 = 2.8201e-04
Loss = 9.4467e-02, PNorm = 81.3558, GNorm = 0.5357, lr_0 = 2.8182e-04
Loss = 1.0080e-01, PNorm = 81.3582, GNorm = 0.4417, lr_0 = 2.8163e-04
Loss = 9.9627e-02, PNorm = 81.3612, GNorm = 0.4792, lr_0 = 2.8143e-04
Loss = 9.4787e-02, PNorm = 81.3655, GNorm = 0.4772, lr_0 = 2.8124e-04
Loss = 1.0166e-01, PNorm = 81.3708, GNorm = 0.5407, lr_0 = 2.8105e-04
Loss = 9.5355e-02, PNorm = 81.3757, GNorm = 1.1163, lr_0 = 2.8085e-04
Loss = 1.0091e-01, PNorm = 81.3818, GNorm = 0.6838, lr_0 = 2.8066e-04
Loss = 1.1130e-01, PNorm = 81.3899, GNorm = 0.8237, lr_0 = 2.8047e-04
Loss = 1.0369e-01, PNorm = 81.3987, GNorm = 0.5535, lr_0 = 2.8028e-04
Loss = 9.8821e-02, PNorm = 81.4039, GNorm = 0.7734, lr_0 = 2.8009e-04
Loss = 9.0562e-02, PNorm = 81.4056, GNorm = 0.6859, lr_0 = 2.7989e-04
Loss = 1.0409e-01, PNorm = 81.4101, GNorm = 1.0297, lr_0 = 2.7970e-04
Loss = 9.2272e-02, PNorm = 81.4168, GNorm = 0.5214, lr_0 = 2.7951e-04
Loss = 9.4710e-02, PNorm = 81.4236, GNorm = 0.5205, lr_0 = 2.7932e-04
Loss = 9.1366e-02, PNorm = 81.4307, GNorm = 0.5828, lr_0 = 2.7913e-04
Loss = 1.0484e-01, PNorm = 81.4355, GNorm = 0.7574, lr_0 = 2.7894e-04
Loss = 8.9854e-02, PNorm = 81.4407, GNorm = 0.5273, lr_0 = 2.7875e-04
Loss = 1.0536e-01, PNorm = 81.4462, GNorm = 0.8143, lr_0 = 2.7855e-04
Loss = 1.1068e-01, PNorm = 81.4550, GNorm = 1.0114, lr_0 = 2.7836e-04
Loss = 1.0304e-01, PNorm = 81.4628, GNorm = 0.5556, lr_0 = 2.7817e-04
Loss = 1.0563e-01, PNorm = 81.4653, GNorm = 0.6751, lr_0 = 2.7798e-04
Loss = 1.0772e-01, PNorm = 81.4689, GNorm = 0.6064, lr_0 = 2.7779e-04
Loss = 1.0595e-01, PNorm = 81.4765, GNorm = 0.6369, lr_0 = 2.7760e-04
Loss = 8.6685e-02, PNorm = 81.4811, GNorm = 0.6431, lr_0 = 2.7741e-04
Loss = 9.9514e-02, PNorm = 81.4891, GNorm = 0.5891, lr_0 = 2.7722e-04
Loss = 9.9417e-02, PNorm = 81.4934, GNorm = 0.6229, lr_0 = 2.7703e-04
Loss = 9.5092e-02, PNorm = 81.4993, GNorm = 0.6731, lr_0 = 2.7684e-04
Loss = 9.7628e-02, PNorm = 81.5028, GNorm = 0.7509, lr_0 = 2.7665e-04
Loss = 1.0947e-01, PNorm = 81.5068, GNorm = 0.8510, lr_0 = 2.7646e-04
Loss = 1.1569e-01, PNorm = 81.5124, GNorm = 0.6343, lr_0 = 2.7627e-04
Loss = 1.0208e-01, PNorm = 81.5192, GNorm = 0.7969, lr_0 = 2.7608e-04
Loss = 8.8118e-02, PNorm = 81.5246, GNorm = 0.6461, lr_0 = 2.7590e-04
Loss = 1.0788e-01, PNorm = 81.5304, GNorm = 0.8202, lr_0 = 2.7571e-04
Loss = 1.0117e-01, PNorm = 81.5345, GNorm = 0.4937, lr_0 = 2.7552e-04
Loss = 1.1657e-01, PNorm = 81.5410, GNorm = 0.6447, lr_0 = 2.7533e-04
Loss = 9.8370e-02, PNorm = 81.5462, GNorm = 0.7712, lr_0 = 2.7514e-04
Loss = 1.0062e-01, PNorm = 81.5531, GNorm = 0.5777, lr_0 = 2.7495e-04
Loss = 9.7156e-02, PNorm = 81.5620, GNorm = 0.7204, lr_0 = 2.7476e-04
Loss = 1.1696e-01, PNorm = 81.5692, GNorm = 0.6119, lr_0 = 2.7457e-04
Loss = 8.9503e-02, PNorm = 81.5791, GNorm = 0.4969, lr_0 = 2.7439e-04
Loss = 1.0037e-01, PNorm = 81.5922, GNorm = 0.6584, lr_0 = 2.7420e-04
Loss = 9.7994e-02, PNorm = 81.5978, GNorm = 0.7717, lr_0 = 2.7401e-04
Loss = 1.0793e-01, PNorm = 81.6023, GNorm = 0.7634, lr_0 = 2.7382e-04
Loss = 1.1787e-01, PNorm = 81.6089, GNorm = 0.7972, lr_0 = 2.7364e-04
Loss = 1.1429e-01, PNorm = 81.6166, GNorm = 0.5565, lr_0 = 2.7345e-04
Loss = 9.5767e-02, PNorm = 81.6220, GNorm = 0.6379, lr_0 = 2.7326e-04
Loss = 9.4529e-02, PNorm = 81.6285, GNorm = 0.5841, lr_0 = 2.7307e-04
Loss = 1.0650e-01, PNorm = 81.6340, GNorm = 0.8074, lr_0 = 2.7289e-04
Loss = 1.0451e-01, PNorm = 81.6361, GNorm = 0.6671, lr_0 = 2.7270e-04
Loss = 8.8513e-02, PNorm = 81.6415, GNorm = 0.6358, lr_0 = 2.7251e-04
Loss = 9.1880e-02, PNorm = 81.6479, GNorm = 0.9897, lr_0 = 2.7233e-04
Loss = 1.0774e-01, PNorm = 81.6526, GNorm = 0.6349, lr_0 = 2.7214e-04
Loss = 1.0377e-01, PNorm = 81.6563, GNorm = 0.7739, lr_0 = 2.7195e-04
Loss = 9.5075e-02, PNorm = 81.6595, GNorm = 0.5083, lr_0 = 2.7177e-04
Loss = 9.8720e-02, PNorm = 81.6660, GNorm = 0.6701, lr_0 = 2.7158e-04
Loss = 9.6862e-02, PNorm = 81.6710, GNorm = 1.2248, lr_0 = 2.7139e-04
Loss = 1.0446e-01, PNorm = 81.6747, GNorm = 0.4990, lr_0 = 2.7121e-04
Loss = 1.1148e-01, PNorm = 81.6801, GNorm = 0.6554, lr_0 = 2.7102e-04
Loss = 8.8963e-02, PNorm = 81.6863, GNorm = 0.6252, lr_0 = 2.7084e-04
Loss = 8.9992e-02, PNorm = 81.6918, GNorm = 0.7857, lr_0 = 2.7065e-04
Loss = 1.0933e-01, PNorm = 81.6964, GNorm = 0.9062, lr_0 = 2.7047e-04
Loss = 1.0225e-01, PNorm = 81.6995, GNorm = 0.5839, lr_0 = 2.7028e-04
Loss = 9.3638e-02, PNorm = 81.7056, GNorm = 0.5261, lr_0 = 2.7010e-04
Loss = 1.0739e-01, PNorm = 81.7106, GNorm = 0.8009, lr_0 = 2.6991e-04
Loss = 1.0825e-01, PNorm = 81.7139, GNorm = 0.8979, lr_0 = 2.6973e-04
Loss = 1.0259e-01, PNorm = 81.7232, GNorm = 1.1157, lr_0 = 2.6954e-04
Loss = 9.7015e-02, PNorm = 81.7280, GNorm = 0.7520, lr_0 = 2.6936e-04
Loss = 1.1245e-01, PNorm = 81.7313, GNorm = 0.6253, lr_0 = 2.6917e-04
Loss = 1.0776e-01, PNorm = 81.7365, GNorm = 0.7501, lr_0 = 2.6899e-04
Loss = 9.0563e-02, PNorm = 81.7399, GNorm = 0.5511, lr_0 = 2.6880e-04
Loss = 9.8510e-02, PNorm = 81.7454, GNorm = 0.6393, lr_0 = 2.6862e-04
Loss = 9.2889e-02, PNorm = 81.7475, GNorm = 0.6828, lr_0 = 2.6844e-04
Loss = 1.0266e-01, PNorm = 81.7543, GNorm = 0.6029, lr_0 = 2.6825e-04
Validation mae = 0.229969
Epoch 18
Loss = 1.0622e-01, PNorm = 81.7628, GNorm = 0.6637, lr_0 = 2.6807e-04
Loss = 1.1127e-01, PNorm = 81.7702, GNorm = 0.7153, lr_0 = 2.6788e-04
Loss = 8.3803e-02, PNorm = 81.7756, GNorm = 0.6039, lr_0 = 2.6770e-04
Loss = 9.9734e-02, PNorm = 81.7771, GNorm = 0.6072, lr_0 = 2.6752e-04
Loss = 9.0011e-02, PNorm = 81.7825, GNorm = 0.6536, lr_0 = 2.6733e-04
Loss = 8.7182e-02, PNorm = 81.7900, GNorm = 0.7519, lr_0 = 2.6715e-04
Loss = 1.0887e-01, PNorm = 81.7990, GNorm = 0.8936, lr_0 = 2.6697e-04
Loss = 1.0714e-01, PNorm = 81.8086, GNorm = 0.5572, lr_0 = 2.6678e-04
Loss = 9.5299e-02, PNorm = 81.8171, GNorm = 0.6686, lr_0 = 2.6660e-04
Loss = 9.8502e-02, PNorm = 81.8222, GNorm = 0.6250, lr_0 = 2.6642e-04
Loss = 1.0187e-01, PNorm = 81.8264, GNorm = 0.5372, lr_0 = 2.6624e-04
Loss = 1.0042e-01, PNorm = 81.8334, GNorm = 0.7847, lr_0 = 2.6605e-04
Loss = 8.1266e-02, PNorm = 81.8390, GNorm = 0.5931, lr_0 = 2.6587e-04
Loss = 8.7166e-02, PNorm = 81.8433, GNorm = 0.5493, lr_0 = 2.6569e-04
Loss = 9.8620e-02, PNorm = 81.8464, GNorm = 0.8091, lr_0 = 2.6551e-04
Loss = 8.7860e-02, PNorm = 81.8529, GNorm = 0.7628, lr_0 = 2.6533e-04
Loss = 9.7833e-02, PNorm = 81.8581, GNorm = 0.6759, lr_0 = 2.6514e-04
Loss = 1.0042e-01, PNorm = 81.8635, GNorm = 0.5134, lr_0 = 2.6496e-04
Loss = 8.4089e-02, PNorm = 81.8663, GNorm = 0.5173, lr_0 = 2.6478e-04
Loss = 1.0255e-01, PNorm = 81.8707, GNorm = 0.5715, lr_0 = 2.6460e-04
Loss = 1.1039e-01, PNorm = 81.8747, GNorm = 0.7411, lr_0 = 2.6442e-04
Loss = 9.7146e-02, PNorm = 81.8785, GNorm = 0.5891, lr_0 = 2.6424e-04
Loss = 1.0029e-01, PNorm = 81.8854, GNorm = 0.5498, lr_0 = 2.6406e-04
Loss = 8.8189e-02, PNorm = 81.8900, GNorm = 0.6820, lr_0 = 2.6388e-04
Loss = 9.6772e-02, PNorm = 81.8942, GNorm = 0.5393, lr_0 = 2.6369e-04
Loss = 9.2908e-02, PNorm = 81.9010, GNorm = 0.5210, lr_0 = 2.6351e-04
Loss = 1.0394e-01, PNorm = 81.9084, GNorm = 0.6862, lr_0 = 2.6333e-04
Loss = 1.1356e-01, PNorm = 81.9103, GNorm = 0.8254, lr_0 = 2.6315e-04
Loss = 9.5585e-02, PNorm = 81.9138, GNorm = 0.5691, lr_0 = 2.6297e-04
Loss = 1.0338e-01, PNorm = 81.9179, GNorm = 0.6477, lr_0 = 2.6279e-04
Loss = 9.2921e-02, PNorm = 81.9222, GNorm = 0.5882, lr_0 = 2.6261e-04
Loss = 9.8025e-02, PNorm = 81.9289, GNorm = 0.5546, lr_0 = 2.6243e-04
Loss = 8.6555e-02, PNorm = 81.9363, GNorm = 0.4609, lr_0 = 2.6225e-04
Loss = 9.6244e-02, PNorm = 81.9406, GNorm = 0.5445, lr_0 = 2.6207e-04
Loss = 1.0031e-01, PNorm = 81.9477, GNorm = 0.9083, lr_0 = 2.6189e-04
Loss = 7.9072e-02, PNorm = 81.9577, GNorm = 0.5803, lr_0 = 2.6171e-04
Loss = 1.0137e-01, PNorm = 81.9639, GNorm = 0.4623, lr_0 = 2.6153e-04
Loss = 9.5398e-02, PNorm = 81.9663, GNorm = 0.7463, lr_0 = 2.6136e-04
Loss = 1.1036e-01, PNorm = 81.9776, GNorm = 1.0832, lr_0 = 2.6118e-04
Loss = 9.0621e-02, PNorm = 81.9823, GNorm = 0.5494, lr_0 = 2.6100e-04
Loss = 9.3898e-02, PNorm = 81.9872, GNorm = 0.7683, lr_0 = 2.6082e-04
Loss = 1.0308e-01, PNorm = 81.9915, GNorm = 0.9225, lr_0 = 2.6064e-04
Loss = 9.5164e-02, PNorm = 82.0002, GNorm = 0.7995, lr_0 = 2.6046e-04
Loss = 1.0186e-01, PNorm = 82.0047, GNorm = 0.9977, lr_0 = 2.6028e-04
Loss = 9.8470e-02, PNorm = 82.0076, GNorm = 0.6053, lr_0 = 2.6011e-04
Loss = 1.0325e-01, PNorm = 82.0105, GNorm = 0.6226, lr_0 = 2.5993e-04
Loss = 8.6572e-02, PNorm = 82.0097, GNorm = 0.5227, lr_0 = 2.5975e-04
Loss = 9.1106e-02, PNorm = 82.0119, GNorm = 0.5688, lr_0 = 2.5957e-04
Loss = 9.3496e-02, PNorm = 82.0182, GNorm = 0.7379, lr_0 = 2.5939e-04
Loss = 9.9497e-02, PNorm = 82.0235, GNorm = 0.6103, lr_0 = 2.5922e-04
Loss = 1.0204e-01, PNorm = 82.0290, GNorm = 0.6076, lr_0 = 2.5904e-04
Loss = 8.2118e-02, PNorm = 82.0342, GNorm = 0.5328, lr_0 = 2.5886e-04
Loss = 9.9271e-02, PNorm = 82.0415, GNorm = 0.5745, lr_0 = 2.5868e-04
Loss = 9.7803e-02, PNorm = 82.0481, GNorm = 0.5663, lr_0 = 2.5851e-04
Loss = 9.4308e-02, PNorm = 82.0557, GNorm = 0.5468, lr_0 = 2.5833e-04
Loss = 9.5398e-02, PNorm = 82.0657, GNorm = 0.5488, lr_0 = 2.5815e-04
Loss = 9.7773e-02, PNorm = 82.0716, GNorm = 0.7726, lr_0 = 2.5797e-04
Loss = 9.2376e-02, PNorm = 82.0751, GNorm = 0.7352, lr_0 = 2.5780e-04
Loss = 8.7949e-02, PNorm = 82.0781, GNorm = 0.6913, lr_0 = 2.5762e-04
Loss = 1.0503e-01, PNorm = 82.0862, GNorm = 0.6698, lr_0 = 2.5745e-04
Loss = 9.2255e-02, PNorm = 82.0875, GNorm = 0.6311, lr_0 = 2.5727e-04
Loss = 1.0525e-01, PNorm = 82.0971, GNorm = 0.8602, lr_0 = 2.5709e-04
Loss = 1.0549e-01, PNorm = 82.1017, GNorm = 0.7238, lr_0 = 2.5692e-04
Loss = 9.3289e-02, PNorm = 82.1093, GNorm = 0.5691, lr_0 = 2.5674e-04
Loss = 8.1254e-02, PNorm = 82.1183, GNorm = 0.6278, lr_0 = 2.5656e-04
Loss = 1.0533e-01, PNorm = 82.1230, GNorm = 0.6792, lr_0 = 2.5639e-04
Loss = 8.7091e-02, PNorm = 82.1300, GNorm = 0.5497, lr_0 = 2.5621e-04
Loss = 9.2986e-02, PNorm = 82.1346, GNorm = 0.5615, lr_0 = 2.5604e-04
Loss = 9.7367e-02, PNorm = 82.1404, GNorm = 0.8156, lr_0 = 2.5586e-04
Loss = 1.0306e-01, PNorm = 82.1425, GNorm = 0.5850, lr_0 = 2.5569e-04
Loss = 9.2117e-02, PNorm = 82.1473, GNorm = 0.6184, lr_0 = 2.5551e-04
Loss = 9.5929e-02, PNorm = 82.1498, GNorm = 0.7210, lr_0 = 2.5534e-04
Loss = 9.9370e-02, PNorm = 82.1539, GNorm = 0.5770, lr_0 = 2.5516e-04
Loss = 8.8091e-02, PNorm = 82.1574, GNorm = 0.6003, lr_0 = 2.5499e-04
Loss = 9.0702e-02, PNorm = 82.1623, GNorm = 0.5477, lr_0 = 2.5481e-04
Loss = 1.0246e-01, PNorm = 82.1657, GNorm = 0.8455, lr_0 = 2.5464e-04
Loss = 8.4665e-02, PNorm = 82.1709, GNorm = 0.6002, lr_0 = 2.5446e-04
Loss = 1.0547e-01, PNorm = 82.1747, GNorm = 0.4938, lr_0 = 2.5429e-04
Loss = 1.0034e-01, PNorm = 82.1771, GNorm = 0.6355, lr_0 = 2.5411e-04
Loss = 1.0675e-01, PNorm = 82.1801, GNorm = 0.4577, lr_0 = 2.5394e-04
Loss = 1.0335e-01, PNorm = 82.1860, GNorm = 0.6965, lr_0 = 2.5377e-04
Loss = 9.2693e-02, PNorm = 82.1929, GNorm = 0.7023, lr_0 = 2.5359e-04
Loss = 9.2772e-02, PNorm = 82.1976, GNorm = 0.6874, lr_0 = 2.5342e-04
Loss = 8.6056e-02, PNorm = 82.1992, GNorm = 0.5598, lr_0 = 2.5325e-04
Loss = 8.7087e-02, PNorm = 82.2024, GNorm = 0.7077, lr_0 = 2.5307e-04
Loss = 9.5058e-02, PNorm = 82.2050, GNorm = 0.6937, lr_0 = 2.5290e-04
Loss = 9.7778e-02, PNorm = 82.2105, GNorm = 0.6663, lr_0 = 2.5273e-04
Loss = 1.0156e-01, PNorm = 82.2130, GNorm = 0.5633, lr_0 = 2.5255e-04
Loss = 9.4380e-02, PNorm = 82.2159, GNorm = 0.6079, lr_0 = 2.5238e-04
Loss = 9.0699e-02, PNorm = 82.2204, GNorm = 0.6342, lr_0 = 2.5221e-04
Loss = 1.0200e-01, PNorm = 82.2278, GNorm = 0.9180, lr_0 = 2.5203e-04
Loss = 1.0407e-01, PNorm = 82.2319, GNorm = 0.6842, lr_0 = 2.5186e-04
Loss = 9.7214e-02, PNorm = 82.2360, GNorm = 0.6693, lr_0 = 2.5169e-04
Loss = 1.0406e-01, PNorm = 82.2413, GNorm = 0.4858, lr_0 = 2.5152e-04
Loss = 9.4350e-02, PNorm = 82.2434, GNorm = 0.6040, lr_0 = 2.5134e-04
Loss = 1.1234e-01, PNorm = 82.2507, GNorm = 0.7922, lr_0 = 2.5117e-04
Loss = 1.0632e-01, PNorm = 82.2555, GNorm = 0.5653, lr_0 = 2.5100e-04
Loss = 9.4177e-02, PNorm = 82.2641, GNorm = 0.6405, lr_0 = 2.5083e-04
Loss = 9.6411e-02, PNorm = 82.2677, GNorm = 1.0520, lr_0 = 2.5066e-04
Loss = 1.0568e-01, PNorm = 82.2671, GNorm = 0.6910, lr_0 = 2.5048e-04
Loss = 9.7142e-02, PNorm = 82.2729, GNorm = 0.5921, lr_0 = 2.5031e-04
Loss = 9.2609e-02, PNorm = 82.2754, GNorm = 0.6223, lr_0 = 2.5014e-04
Loss = 9.3971e-02, PNorm = 82.2808, GNorm = 0.6120, lr_0 = 2.4997e-04
Loss = 9.7876e-02, PNorm = 82.2877, GNorm = 0.6639, lr_0 = 2.4980e-04
Loss = 8.5187e-02, PNorm = 82.2939, GNorm = 0.4538, lr_0 = 2.4963e-04
Loss = 8.9636e-02, PNorm = 82.3000, GNorm = 0.6792, lr_0 = 2.4946e-04
Loss = 8.7004e-02, PNorm = 82.3035, GNorm = 0.6218, lr_0 = 2.4929e-04
Loss = 1.0247e-01, PNorm = 82.3063, GNorm = 0.5363, lr_0 = 2.4911e-04
Loss = 1.0192e-01, PNorm = 82.3093, GNorm = 0.6876, lr_0 = 2.4894e-04
Loss = 1.1384e-01, PNorm = 82.3147, GNorm = 0.7757, lr_0 = 2.4877e-04
Loss = 1.0659e-01, PNorm = 82.3210, GNorm = 0.6081, lr_0 = 2.4860e-04
Loss = 9.8597e-02, PNorm = 82.3282, GNorm = 0.7223, lr_0 = 2.4843e-04
Loss = 1.1281e-01, PNorm = 82.3324, GNorm = 0.7220, lr_0 = 2.4826e-04
Loss = 1.0016e-01, PNorm = 82.3349, GNorm = 0.6044, lr_0 = 2.4809e-04
Loss = 9.6998e-02, PNorm = 82.3390, GNorm = 0.7701, lr_0 = 2.4792e-04
Loss = 1.0945e-01, PNorm = 82.3432, GNorm = 0.9563, lr_0 = 2.4775e-04
Loss = 9.7157e-02, PNorm = 82.3442, GNorm = 0.5773, lr_0 = 2.4758e-04
Loss = 8.3389e-02, PNorm = 82.3460, GNorm = 0.7054, lr_0 = 2.4741e-04
Loss = 1.1001e-01, PNorm = 82.3514, GNorm = 0.6379, lr_0 = 2.4724e-04
Loss = 1.1574e-01, PNorm = 82.3537, GNorm = 0.9415, lr_0 = 2.4707e-04
Validation mae = 0.231214
Epoch 19
Loss = 8.8093e-02, PNorm = 82.3592, GNorm = 0.5499, lr_0 = 2.4690e-04
Loss = 8.4452e-02, PNorm = 82.3634, GNorm = 0.6918, lr_0 = 2.4674e-04
Loss = 1.0429e-01, PNorm = 82.3709, GNorm = 0.6263, lr_0 = 2.4657e-04
Loss = 8.9316e-02, PNorm = 82.3783, GNorm = 0.5813, lr_0 = 2.4640e-04
Loss = 9.0921e-02, PNorm = 82.3871, GNorm = 0.7021, lr_0 = 2.4623e-04
Loss = 8.3025e-02, PNorm = 82.3931, GNorm = 0.5562, lr_0 = 2.4606e-04
Loss = 8.5878e-02, PNorm = 82.4003, GNorm = 0.4488, lr_0 = 2.4589e-04
Loss = 1.0334e-01, PNorm = 82.4055, GNorm = 0.9842, lr_0 = 2.4572e-04
Loss = 8.7480e-02, PNorm = 82.4085, GNorm = 0.7395, lr_0 = 2.4556e-04
Loss = 9.3472e-02, PNorm = 82.4093, GNorm = 0.7182, lr_0 = 2.4539e-04
Loss = 1.0314e-01, PNorm = 82.4122, GNorm = 0.7403, lr_0 = 2.4522e-04
Loss = 9.9276e-02, PNorm = 82.4167, GNorm = 0.6596, lr_0 = 2.4505e-04
Loss = 9.2549e-02, PNorm = 82.4229, GNorm = 0.4535, lr_0 = 2.4488e-04
Loss = 1.0005e-01, PNorm = 82.4293, GNorm = 0.6105, lr_0 = 2.4472e-04
Loss = 8.4069e-02, PNorm = 82.4332, GNorm = 0.6110, lr_0 = 2.4455e-04
Loss = 9.3484e-02, PNorm = 82.4359, GNorm = 0.6152, lr_0 = 2.4438e-04
Loss = 8.9291e-02, PNorm = 82.4408, GNorm = 0.6689, lr_0 = 2.4421e-04
Loss = 8.6301e-02, PNorm = 82.4487, GNorm = 0.7880, lr_0 = 2.4405e-04
Loss = 9.7696e-02, PNorm = 82.4560, GNorm = 0.6906, lr_0 = 2.4388e-04
Loss = 1.0002e-01, PNorm = 82.4589, GNorm = 0.6061, lr_0 = 2.4371e-04
Loss = 9.0991e-02, PNorm = 82.4663, GNorm = 1.0075, lr_0 = 2.4354e-04
Loss = 8.8506e-02, PNorm = 82.4721, GNorm = 0.8428, lr_0 = 2.4338e-04
Loss = 9.2949e-02, PNorm = 82.4781, GNorm = 0.7426, lr_0 = 2.4321e-04
Loss = 9.2322e-02, PNorm = 82.4811, GNorm = 0.6363, lr_0 = 2.4304e-04
Loss = 8.6153e-02, PNorm = 82.4868, GNorm = 0.6856, lr_0 = 2.4288e-04
Loss = 1.0565e-01, PNorm = 82.4925, GNorm = 0.8278, lr_0 = 2.4271e-04
Loss = 1.0069e-01, PNorm = 82.4965, GNorm = 0.6488, lr_0 = 2.4254e-04
Loss = 8.7164e-02, PNorm = 82.5003, GNorm = 0.5739, lr_0 = 2.4238e-04
Loss = 9.0091e-02, PNorm = 82.5050, GNorm = 0.5766, lr_0 = 2.4221e-04
Loss = 9.3450e-02, PNorm = 82.5084, GNorm = 0.9164, lr_0 = 2.4205e-04
Loss = 9.5632e-02, PNorm = 82.5140, GNorm = 0.5235, lr_0 = 2.4188e-04
Loss = 8.6606e-02, PNorm = 82.5166, GNorm = 0.6310, lr_0 = 2.4171e-04
Loss = 9.1249e-02, PNorm = 82.5206, GNorm = 0.5116, lr_0 = 2.4155e-04
Loss = 9.5405e-02, PNorm = 82.5225, GNorm = 0.8073, lr_0 = 2.4138e-04
Loss = 9.2894e-02, PNorm = 82.5265, GNorm = 0.5828, lr_0 = 2.4122e-04
Loss = 1.0319e-01, PNorm = 82.5281, GNorm = 0.5998, lr_0 = 2.4105e-04
Loss = 8.9229e-02, PNorm = 82.5314, GNorm = 0.4397, lr_0 = 2.4089e-04
Loss = 9.1482e-02, PNorm = 82.5377, GNorm = 0.6702, lr_0 = 2.4072e-04
Loss = 9.5075e-02, PNorm = 82.5423, GNorm = 0.7553, lr_0 = 2.4056e-04
Loss = 9.3935e-02, PNorm = 82.5450, GNorm = 0.6335, lr_0 = 2.4039e-04
Loss = 1.0593e-01, PNorm = 82.5501, GNorm = 0.8258, lr_0 = 2.4023e-04
Loss = 9.5502e-02, PNorm = 82.5544, GNorm = 0.5954, lr_0 = 2.4006e-04
Loss = 9.7836e-02, PNorm = 82.5600, GNorm = 0.5236, lr_0 = 2.3990e-04
Loss = 9.4378e-02, PNorm = 82.5670, GNorm = 0.7900, lr_0 = 2.3974e-04
Loss = 1.0757e-01, PNorm = 82.5686, GNorm = 1.0299, lr_0 = 2.3957e-04
Loss = 7.6958e-02, PNorm = 82.5732, GNorm = 0.5393, lr_0 = 2.3941e-04
Loss = 9.3984e-02, PNorm = 82.5797, GNorm = 0.6049, lr_0 = 2.3924e-04
Loss = 9.3688e-02, PNorm = 82.5827, GNorm = 0.9112, lr_0 = 2.3908e-04
Loss = 9.6552e-02, PNorm = 82.5932, GNorm = 0.4311, lr_0 = 2.3892e-04
Loss = 9.2422e-02, PNorm = 82.5994, GNorm = 0.6317, lr_0 = 2.3875e-04
Loss = 1.0759e-01, PNorm = 82.6045, GNorm = 0.6337, lr_0 = 2.3859e-04
Loss = 1.0065e-01, PNorm = 82.6101, GNorm = 1.1747, lr_0 = 2.3842e-04
Loss = 9.2739e-02, PNorm = 82.6123, GNorm = 0.7316, lr_0 = 2.3826e-04
Loss = 9.5101e-02, PNorm = 82.6115, GNorm = 0.4310, lr_0 = 2.3810e-04
Loss = 9.4609e-02, PNorm = 82.6133, GNorm = 0.5773, lr_0 = 2.3794e-04
Loss = 8.8583e-02, PNorm = 82.6144, GNorm = 0.6505, lr_0 = 2.3777e-04
Loss = 9.3597e-02, PNorm = 82.6149, GNorm = 0.7954, lr_0 = 2.3761e-04
Loss = 1.0683e-01, PNorm = 82.6188, GNorm = 0.6575, lr_0 = 2.3745e-04
Loss = 8.8672e-02, PNorm = 82.6208, GNorm = 0.7188, lr_0 = 2.3728e-04
Loss = 1.1140e-01, PNorm = 82.6273, GNorm = 0.7513, lr_0 = 2.3712e-04
Loss = 1.0601e-01, PNorm = 82.6347, GNorm = 0.8857, lr_0 = 2.3696e-04
Loss = 9.8095e-02, PNorm = 82.6408, GNorm = 0.7653, lr_0 = 2.3680e-04
Loss = 1.0419e-01, PNorm = 82.6459, GNorm = 0.6282, lr_0 = 2.3663e-04
Loss = 8.6678e-02, PNorm = 82.6517, GNorm = 0.5903, lr_0 = 2.3647e-04
Loss = 8.5339e-02, PNorm = 82.6566, GNorm = 0.6214, lr_0 = 2.3631e-04
Loss = 8.7438e-02, PNorm = 82.6642, GNorm = 0.8088, lr_0 = 2.3615e-04
Loss = 9.8332e-02, PNorm = 82.6692, GNorm = 0.6800, lr_0 = 2.3599e-04
Loss = 9.5660e-02, PNorm = 82.6735, GNorm = 0.5774, lr_0 = 2.3582e-04
Loss = 1.0186e-01, PNorm = 82.6797, GNorm = 0.6944, lr_0 = 2.3566e-04
Loss = 9.0332e-02, PNorm = 82.6849, GNorm = 0.6483, lr_0 = 2.3550e-04
Loss = 9.4711e-02, PNorm = 82.6882, GNorm = 0.6923, lr_0 = 2.3534e-04
Loss = 1.0344e-01, PNorm = 82.6899, GNorm = 0.7045, lr_0 = 2.3518e-04
Loss = 8.5452e-02, PNorm = 82.6931, GNorm = 0.4523, lr_0 = 2.3502e-04
Loss = 1.0286e-01, PNorm = 82.6989, GNorm = 0.6230, lr_0 = 2.3486e-04
Loss = 9.4377e-02, PNorm = 82.7050, GNorm = 0.7827, lr_0 = 2.3470e-04
Loss = 1.0749e-01, PNorm = 82.7081, GNorm = 0.7900, lr_0 = 2.3454e-04
Loss = 9.7079e-02, PNorm = 82.7105, GNorm = 0.8554, lr_0 = 2.3437e-04
Loss = 9.6479e-02, PNorm = 82.7138, GNorm = 0.5599, lr_0 = 2.3421e-04
Loss = 9.1422e-02, PNorm = 82.7179, GNorm = 0.7785, lr_0 = 2.3405e-04
Loss = 8.9784e-02, PNorm = 82.7211, GNorm = 0.6337, lr_0 = 2.3389e-04
Loss = 1.0920e-01, PNorm = 82.7243, GNorm = 1.0591, lr_0 = 2.3373e-04
Loss = 1.0481e-01, PNorm = 82.7293, GNorm = 0.5234, lr_0 = 2.3357e-04
Loss = 9.2362e-02, PNorm = 82.7341, GNorm = 0.5648, lr_0 = 2.3341e-04
Loss = 1.1063e-01, PNorm = 82.7402, GNorm = 0.8139, lr_0 = 2.3325e-04
Loss = 8.7530e-02, PNorm = 82.7459, GNorm = 0.5928, lr_0 = 2.3309e-04
Loss = 9.1226e-02, PNorm = 82.7477, GNorm = 0.6184, lr_0 = 2.3293e-04
Loss = 9.7923e-02, PNorm = 82.7524, GNorm = 0.8427, lr_0 = 2.3277e-04
Loss = 8.6492e-02, PNorm = 82.7548, GNorm = 0.8750, lr_0 = 2.3261e-04
Loss = 8.1376e-02, PNorm = 82.7576, GNorm = 0.6286, lr_0 = 2.3246e-04
Loss = 8.9522e-02, PNorm = 82.7618, GNorm = 0.5173, lr_0 = 2.3230e-04
Loss = 9.3939e-02, PNorm = 82.7663, GNorm = 0.5841, lr_0 = 2.3214e-04
Loss = 9.3761e-02, PNorm = 82.7668, GNorm = 0.7108, lr_0 = 2.3198e-04
Loss = 8.7969e-02, PNorm = 82.7704, GNorm = 0.7256, lr_0 = 2.3182e-04
Loss = 9.9243e-02, PNorm = 82.7776, GNorm = 0.6137, lr_0 = 2.3166e-04
Loss = 1.1024e-01, PNorm = 82.7870, GNorm = 0.6980, lr_0 = 2.3150e-04
Loss = 9.1269e-02, PNorm = 82.7937, GNorm = 0.6901, lr_0 = 2.3134e-04
Loss = 9.6903e-02, PNorm = 82.7966, GNorm = 0.6144, lr_0 = 2.3118e-04
Loss = 8.9395e-02, PNorm = 82.7997, GNorm = 0.7849, lr_0 = 2.3103e-04
Loss = 8.4152e-02, PNorm = 82.8023, GNorm = 0.5309, lr_0 = 2.3087e-04
Loss = 9.7307e-02, PNorm = 82.8065, GNorm = 0.6358, lr_0 = 2.3071e-04
Loss = 1.0208e-01, PNorm = 82.8113, GNorm = 0.6970, lr_0 = 2.3055e-04
Loss = 8.9942e-02, PNorm = 82.8176, GNorm = 0.4960, lr_0 = 2.3039e-04
Loss = 9.6734e-02, PNorm = 82.8267, GNorm = 0.6024, lr_0 = 2.3024e-04
Loss = 8.4533e-02, PNorm = 82.8323, GNorm = 0.6387, lr_0 = 2.3008e-04
Loss = 8.8013e-02, PNorm = 82.8333, GNorm = 0.5487, lr_0 = 2.2992e-04
Loss = 8.9538e-02, PNorm = 82.8352, GNorm = 0.4560, lr_0 = 2.2976e-04
Loss = 9.1965e-02, PNorm = 82.8382, GNorm = 0.8845, lr_0 = 2.2961e-04
Loss = 1.2115e-01, PNorm = 82.8441, GNorm = 0.6452, lr_0 = 2.2945e-04
Loss = 1.0342e-01, PNorm = 82.8468, GNorm = 0.6291, lr_0 = 2.2929e-04
Loss = 8.5202e-02, PNorm = 82.8513, GNorm = 0.4942, lr_0 = 2.2913e-04
Loss = 1.0407e-01, PNorm = 82.8544, GNorm = 0.6406, lr_0 = 2.2898e-04
Loss = 1.0483e-01, PNorm = 82.8577, GNorm = 0.5122, lr_0 = 2.2882e-04
Loss = 9.9178e-02, PNorm = 82.8609, GNorm = 0.8886, lr_0 = 2.2866e-04
Loss = 9.4118e-02, PNorm = 82.8603, GNorm = 0.7921, lr_0 = 2.2851e-04
Loss = 9.5303e-02, PNorm = 82.8635, GNorm = 0.6736, lr_0 = 2.2835e-04
Loss = 9.5642e-02, PNorm = 82.8631, GNorm = 0.6272, lr_0 = 2.2819e-04
Loss = 9.4678e-02, PNorm = 82.8623, GNorm = 0.7174, lr_0 = 2.2804e-04
Loss = 1.0647e-01, PNorm = 82.8687, GNorm = 0.6878, lr_0 = 2.2788e-04
Loss = 8.8958e-02, PNorm = 82.8742, GNorm = 0.6283, lr_0 = 2.2773e-04
Loss = 1.0305e-01, PNorm = 82.8789, GNorm = 0.7701, lr_0 = 2.2757e-04
Validation mae = 0.228777
Epoch 20
Loss = 8.3658e-02, PNorm = 82.8853, GNorm = 0.6081, lr_0 = 2.2741e-04
Loss = 9.5744e-02, PNorm = 82.8908, GNorm = 0.6808, lr_0 = 2.2726e-04
Loss = 9.3704e-02, PNorm = 82.8958, GNorm = 0.5615, lr_0 = 2.2710e-04
Loss = 8.9347e-02, PNorm = 82.9000, GNorm = 0.5580, lr_0 = 2.2695e-04
Loss = 8.3732e-02, PNorm = 82.9047, GNorm = 1.1285, lr_0 = 2.2679e-04
Loss = 7.6124e-02, PNorm = 82.9106, GNorm = 0.6386, lr_0 = 2.2664e-04
Loss = 9.0457e-02, PNorm = 82.9164, GNorm = 0.5625, lr_0 = 2.2648e-04
Loss = 7.6392e-02, PNorm = 82.9179, GNorm = 0.6935, lr_0 = 2.2632e-04
Loss = 8.2329e-02, PNorm = 82.9200, GNorm = 0.5203, lr_0 = 2.2617e-04
Loss = 8.7135e-02, PNorm = 82.9235, GNorm = 0.6796, lr_0 = 2.2601e-04
Loss = 7.6430e-02, PNorm = 82.9242, GNorm = 0.5424, lr_0 = 2.2586e-04
Loss = 8.4957e-02, PNorm = 82.9261, GNorm = 0.5824, lr_0 = 2.2571e-04
Loss = 9.4119e-02, PNorm = 82.9310, GNorm = 0.6019, lr_0 = 2.2555e-04
Loss = 9.6914e-02, PNorm = 82.9342, GNorm = 0.7512, lr_0 = 2.2540e-04
Loss = 1.0801e-01, PNorm = 82.9383, GNorm = 0.6509, lr_0 = 2.2524e-04
Loss = 9.5612e-02, PNorm = 82.9418, GNorm = 0.7353, lr_0 = 2.2509e-04
Loss = 8.3153e-02, PNorm = 82.9463, GNorm = 0.5661, lr_0 = 2.2493e-04
Loss = 8.6814e-02, PNorm = 82.9524, GNorm = 0.6983, lr_0 = 2.2478e-04
Loss = 9.7695e-02, PNorm = 82.9562, GNorm = 0.5698, lr_0 = 2.2463e-04
Loss = 8.3526e-02, PNorm = 82.9595, GNorm = 0.6283, lr_0 = 2.2447e-04
Loss = 8.2783e-02, PNorm = 82.9641, GNorm = 0.5719, lr_0 = 2.2432e-04
Loss = 8.8791e-02, PNorm = 82.9722, GNorm = 0.6854, lr_0 = 2.2416e-04
Loss = 8.6485e-02, PNorm = 82.9781, GNorm = 0.8481, lr_0 = 2.2401e-04
Loss = 8.9669e-02, PNorm = 82.9820, GNorm = 0.7913, lr_0 = 2.2386e-04
Loss = 8.3268e-02, PNorm = 82.9864, GNorm = 0.6708, lr_0 = 2.2370e-04
Loss = 1.0517e-01, PNorm = 82.9861, GNorm = 0.5305, lr_0 = 2.2355e-04
Loss = 8.4693e-02, PNorm = 82.9869, GNorm = 0.6131, lr_0 = 2.2340e-04
Loss = 8.6028e-02, PNorm = 82.9901, GNorm = 0.7759, lr_0 = 2.2324e-04
Loss = 8.9662e-02, PNorm = 82.9964, GNorm = 0.6055, lr_0 = 2.2309e-04
Loss = 9.3140e-02, PNorm = 83.0032, GNorm = 0.5494, lr_0 = 2.2294e-04
Loss = 8.2317e-02, PNorm = 83.0069, GNorm = 0.7518, lr_0 = 2.2279e-04
Loss = 8.8543e-02, PNorm = 83.0119, GNorm = 0.6761, lr_0 = 2.2263e-04
Loss = 9.5969e-02, PNorm = 83.0134, GNorm = 0.6637, lr_0 = 2.2248e-04
Loss = 7.7539e-02, PNorm = 83.0190, GNorm = 0.5089, lr_0 = 2.2233e-04
Loss = 9.4366e-02, PNorm = 83.0236, GNorm = 0.7347, lr_0 = 2.2218e-04
Loss = 9.6046e-02, PNorm = 83.0279, GNorm = 0.5193, lr_0 = 2.2202e-04
Loss = 9.5744e-02, PNorm = 83.0329, GNorm = 1.0244, lr_0 = 2.2187e-04
Loss = 9.2593e-02, PNorm = 83.0389, GNorm = 0.5369, lr_0 = 2.2172e-04
Loss = 9.4829e-02, PNorm = 83.0458, GNorm = 0.5716, lr_0 = 2.2157e-04
Loss = 9.5458e-02, PNorm = 83.0513, GNorm = 0.6716, lr_0 = 2.2142e-04
Loss = 8.9581e-02, PNorm = 83.0543, GNorm = 0.6835, lr_0 = 2.2126e-04
Loss = 9.5340e-02, PNorm = 83.0582, GNorm = 0.6020, lr_0 = 2.2111e-04
Loss = 1.0006e-01, PNorm = 83.0628, GNorm = 0.9572, lr_0 = 2.2096e-04
Loss = 9.0322e-02, PNorm = 83.0673, GNorm = 0.7065, lr_0 = 2.2081e-04
Loss = 9.5465e-02, PNorm = 83.0678, GNorm = 0.5450, lr_0 = 2.2066e-04
Loss = 9.3851e-02, PNorm = 83.0743, GNorm = 0.5193, lr_0 = 2.2051e-04
Loss = 8.8637e-02, PNorm = 83.0822, GNorm = 0.8855, lr_0 = 2.2036e-04
Loss = 1.0464e-01, PNorm = 83.0888, GNorm = 0.5586, lr_0 = 2.2021e-04
Loss = 1.0162e-01, PNorm = 83.0954, GNorm = 0.7046, lr_0 = 2.2005e-04
Loss = 8.9078e-02, PNorm = 83.0987, GNorm = 0.6846, lr_0 = 2.1990e-04
Loss = 9.4289e-02, PNorm = 83.1022, GNorm = 0.6494, lr_0 = 2.1975e-04
Loss = 9.6827e-02, PNorm = 83.1079, GNorm = 0.6148, lr_0 = 2.1960e-04
Loss = 8.8273e-02, PNorm = 83.1100, GNorm = 0.5577, lr_0 = 2.1945e-04
Loss = 8.3686e-02, PNorm = 83.1092, GNorm = 0.4476, lr_0 = 2.1930e-04
Loss = 9.8504e-02, PNorm = 83.1136, GNorm = 0.6926, lr_0 = 2.1915e-04
Loss = 8.6055e-02, PNorm = 83.1184, GNorm = 0.5062, lr_0 = 2.1900e-04
Loss = 1.0607e-01, PNorm = 83.1229, GNorm = 0.6178, lr_0 = 2.1885e-04
Loss = 9.1423e-02, PNorm = 83.1281, GNorm = 0.6334, lr_0 = 2.1870e-04
Loss = 8.0340e-02, PNorm = 83.1315, GNorm = 0.6596, lr_0 = 2.1855e-04
Loss = 9.3172e-02, PNorm = 83.1337, GNorm = 0.6199, lr_0 = 2.1840e-04
Loss = 9.2462e-02, PNorm = 83.1364, GNorm = 0.6561, lr_0 = 2.1825e-04
Loss = 1.0408e-01, PNorm = 83.1434, GNorm = 0.6502, lr_0 = 2.1810e-04
Loss = 7.8596e-02, PNorm = 83.1499, GNorm = 0.6576, lr_0 = 2.1795e-04
Loss = 1.0185e-01, PNorm = 83.1565, GNorm = 0.6370, lr_0 = 2.1780e-04
Loss = 1.0246e-01, PNorm = 83.1620, GNorm = 0.7095, lr_0 = 2.1765e-04
Loss = 9.9257e-02, PNorm = 83.1662, GNorm = 0.5910, lr_0 = 2.1751e-04
Loss = 9.1236e-02, PNorm = 83.1694, GNorm = 0.6995, lr_0 = 2.1736e-04
Loss = 1.1095e-01, PNorm = 83.1707, GNorm = 1.0112, lr_0 = 2.1721e-04
Loss = 9.8963e-02, PNorm = 83.1725, GNorm = 0.7479, lr_0 = 2.1706e-04
Loss = 8.5137e-02, PNorm = 83.1736, GNorm = 0.6209, lr_0 = 2.1691e-04
Loss = 9.1153e-02, PNorm = 83.1749, GNorm = 0.6543, lr_0 = 2.1676e-04
Loss = 9.5795e-02, PNorm = 83.1780, GNorm = 0.6600, lr_0 = 2.1661e-04
Loss = 8.8205e-02, PNorm = 83.1818, GNorm = 0.6343, lr_0 = 2.1646e-04
Loss = 9.6729e-02, PNorm = 83.1890, GNorm = 0.8739, lr_0 = 2.1632e-04
Loss = 8.5632e-02, PNorm = 83.1949, GNorm = 0.4693, lr_0 = 2.1617e-04
Loss = 1.0661e-01, PNorm = 83.1985, GNorm = 0.4543, lr_0 = 2.1602e-04
Loss = 8.8112e-02, PNorm = 83.2000, GNorm = 0.6198, lr_0 = 2.1587e-04
Loss = 8.3403e-02, PNorm = 83.2031, GNorm = 0.5884, lr_0 = 2.1572e-04
Loss = 8.6558e-02, PNorm = 83.2061, GNorm = 0.8900, lr_0 = 2.1558e-04
Loss = 1.2142e-01, PNorm = 83.2110, GNorm = 0.7467, lr_0 = 2.1543e-04
Loss = 8.6646e-02, PNorm = 83.2186, GNorm = 0.6671, lr_0 = 2.1528e-04
Loss = 8.2358e-02, PNorm = 83.2257, GNorm = 0.6295, lr_0 = 2.1513e-04
Loss = 9.4278e-02, PNorm = 83.2292, GNorm = 0.8013, lr_0 = 2.1499e-04
Loss = 9.1584e-02, PNorm = 83.2345, GNorm = 0.5595, lr_0 = 2.1484e-04
Loss = 9.3907e-02, PNorm = 83.2424, GNorm = 0.5636, lr_0 = 2.1469e-04
Loss = 1.0747e-01, PNorm = 83.2420, GNorm = 0.6671, lr_0 = 2.1454e-04
Loss = 1.0255e-01, PNorm = 83.2437, GNorm = 0.8382, lr_0 = 2.1440e-04
Loss = 1.0322e-01, PNorm = 83.2458, GNorm = 0.9759, lr_0 = 2.1425e-04
Loss = 9.5268e-02, PNorm = 83.2514, GNorm = 0.7331, lr_0 = 2.1410e-04
Loss = 7.3761e-02, PNorm = 83.2565, GNorm = 0.7583, lr_0 = 2.1396e-04
Loss = 8.8207e-02, PNorm = 83.2602, GNorm = 0.6088, lr_0 = 2.1381e-04
Loss = 1.0083e-01, PNorm = 83.2678, GNorm = 1.0946, lr_0 = 2.1366e-04
Loss = 9.3691e-02, PNorm = 83.2739, GNorm = 0.6057, lr_0 = 2.1352e-04
Loss = 9.4466e-02, PNorm = 83.2805, GNorm = 0.7282, lr_0 = 2.1337e-04
Loss = 8.1940e-02, PNorm = 83.2845, GNorm = 0.5996, lr_0 = 2.1323e-04
Loss = 9.9022e-02, PNorm = 83.2889, GNorm = 0.6863, lr_0 = 2.1308e-04
Loss = 8.9176e-02, PNorm = 83.2923, GNorm = 0.6167, lr_0 = 2.1293e-04
Loss = 9.0448e-02, PNorm = 83.2959, GNorm = 0.7062, lr_0 = 2.1279e-04
Loss = 9.4174e-02, PNorm = 83.2996, GNorm = 0.5038, lr_0 = 2.1264e-04
Loss = 9.0942e-02, PNorm = 83.3026, GNorm = 0.5627, lr_0 = 2.1250e-04
Loss = 8.8637e-02, PNorm = 83.3083, GNorm = 1.0282, lr_0 = 2.1235e-04
Loss = 9.6828e-02, PNorm = 83.3099, GNorm = 0.7543, lr_0 = 2.1221e-04
Loss = 9.8016e-02, PNorm = 83.3138, GNorm = 0.6568, lr_0 = 2.1206e-04
Loss = 8.7825e-02, PNorm = 83.3163, GNorm = 0.6646, lr_0 = 2.1191e-04
Loss = 8.6994e-02, PNorm = 83.3210, GNorm = 0.7250, lr_0 = 2.1177e-04
Loss = 9.5021e-02, PNorm = 83.3274, GNorm = 0.8294, lr_0 = 2.1162e-04
Loss = 9.0475e-02, PNorm = 83.3310, GNorm = 0.7585, lr_0 = 2.1148e-04
Loss = 9.8947e-02, PNorm = 83.3356, GNorm = 0.6574, lr_0 = 2.1133e-04
Loss = 9.7633e-02, PNorm = 83.3384, GNorm = 0.9217, lr_0 = 2.1119e-04
Loss = 8.5698e-02, PNorm = 83.3409, GNorm = 0.6374, lr_0 = 2.1104e-04
Loss = 1.0409e-01, PNorm = 83.3415, GNorm = 0.5561, lr_0 = 2.1090e-04
Loss = 8.4764e-02, PNorm = 83.3452, GNorm = 0.5128, lr_0 = 2.1076e-04
Loss = 9.7816e-02, PNorm = 83.3494, GNorm = 0.7988, lr_0 = 2.1061e-04
Loss = 8.1640e-02, PNorm = 83.3537, GNorm = 0.5639, lr_0 = 2.1047e-04
Loss = 9.7022e-02, PNorm = 83.3568, GNorm = 0.8254, lr_0 = 2.1032e-04
Loss = 8.3820e-02, PNorm = 83.3577, GNorm = 0.5012, lr_0 = 2.1018e-04
Loss = 1.0456e-01, PNorm = 83.3636, GNorm = 1.2301, lr_0 = 2.1003e-04
Loss = 9.7963e-02, PNorm = 83.3695, GNorm = 0.4490, lr_0 = 2.0989e-04
Loss = 9.1034e-02, PNorm = 83.3743, GNorm = 0.6387, lr_0 = 2.0975e-04
Loss = 8.0848e-02, PNorm = 83.3774, GNorm = 0.6049, lr_0 = 2.0960e-04
Validation mae = 0.228325
Epoch 21
Loss = 8.5760e-02, PNorm = 83.3816, GNorm = 0.8579, lr_0 = 2.0946e-04
Loss = 9.3275e-02, PNorm = 83.3840, GNorm = 0.6437, lr_0 = 2.0932e-04
Loss = 8.2047e-02, PNorm = 83.3875, GNorm = 0.6513, lr_0 = 2.0917e-04
Loss = 8.4824e-02, PNorm = 83.3885, GNorm = 0.5785, lr_0 = 2.0903e-04
Loss = 7.8292e-02, PNorm = 83.3921, GNorm = 0.6425, lr_0 = 2.0889e-04
Loss = 8.0440e-02, PNorm = 83.3942, GNorm = 0.4341, lr_0 = 2.0874e-04
Loss = 7.6737e-02, PNorm = 83.4000, GNorm = 0.6792, lr_0 = 2.0860e-04
Loss = 8.1158e-02, PNorm = 83.4036, GNorm = 0.4200, lr_0 = 2.0846e-04
Loss = 9.3736e-02, PNorm = 83.4042, GNorm = 0.5205, lr_0 = 2.0831e-04
Loss = 8.7514e-02, PNorm = 83.4062, GNorm = 0.4603, lr_0 = 2.0817e-04
Loss = 8.5250e-02, PNorm = 83.4110, GNorm = 0.6750, lr_0 = 2.0803e-04
Loss = 8.4784e-02, PNorm = 83.4128, GNorm = 0.5210, lr_0 = 2.0789e-04
Loss = 8.7246e-02, PNorm = 83.4162, GNorm = 0.6626, lr_0 = 2.0774e-04
Loss = 9.0518e-02, PNorm = 83.4194, GNorm = 0.7232, lr_0 = 2.0760e-04
Loss = 7.9507e-02, PNorm = 83.4255, GNorm = 0.7481, lr_0 = 2.0746e-04
Loss = 8.7467e-02, PNorm = 83.4314, GNorm = 0.5453, lr_0 = 2.0732e-04
Loss = 8.6954e-02, PNorm = 83.4354, GNorm = 0.5680, lr_0 = 2.0718e-04
Loss = 7.6425e-02, PNorm = 83.4403, GNorm = 0.6209, lr_0 = 2.0703e-04
Loss = 9.6362e-02, PNorm = 83.4463, GNorm = 0.7333, lr_0 = 2.0689e-04
Loss = 8.4412e-02, PNorm = 83.4488, GNorm = 0.5569, lr_0 = 2.0675e-04
Loss = 7.7411e-02, PNorm = 83.4528, GNorm = 0.4498, lr_0 = 2.0661e-04
Loss = 8.5464e-02, PNorm = 83.4577, GNorm = 0.7717, lr_0 = 2.0647e-04
Loss = 8.3468e-02, PNorm = 83.4594, GNorm = 1.1744, lr_0 = 2.0633e-04
Loss = 8.9028e-02, PNorm = 83.4580, GNorm = 0.6086, lr_0 = 2.0618e-04
Loss = 9.5935e-02, PNorm = 83.4610, GNorm = 0.5300, lr_0 = 2.0604e-04
Loss = 1.0496e-01, PNorm = 83.4676, GNorm = 0.8220, lr_0 = 2.0590e-04
Loss = 8.5430e-02, PNorm = 83.4737, GNorm = 0.5160, lr_0 = 2.0576e-04
Loss = 8.9278e-02, PNorm = 83.4770, GNorm = 0.5669, lr_0 = 2.0562e-04
Loss = 9.5866e-02, PNorm = 83.4771, GNorm = 0.6392, lr_0 = 2.0548e-04
Loss = 8.5414e-02, PNorm = 83.4806, GNorm = 0.5957, lr_0 = 2.0534e-04
Loss = 9.2199e-02, PNorm = 83.4837, GNorm = 0.7390, lr_0 = 2.0520e-04
Loss = 9.5383e-02, PNorm = 83.4899, GNorm = 0.4897, lr_0 = 2.0506e-04
Loss = 8.3146e-02, PNorm = 83.4944, GNorm = 0.7452, lr_0 = 2.0492e-04
Loss = 9.2330e-02, PNorm = 83.4986, GNorm = 0.7804, lr_0 = 2.0478e-04
Loss = 9.1407e-02, PNorm = 83.5042, GNorm = 0.6223, lr_0 = 2.0464e-04
Loss = 8.0839e-02, PNorm = 83.5075, GNorm = 0.5869, lr_0 = 2.0450e-04
Loss = 8.8775e-02, PNorm = 83.5095, GNorm = 0.5919, lr_0 = 2.0436e-04
Loss = 8.8885e-02, PNorm = 83.5124, GNorm = 0.5503, lr_0 = 2.0422e-04
Loss = 1.0256e-01, PNorm = 83.5132, GNorm = 0.7191, lr_0 = 2.0408e-04
Loss = 9.3615e-02, PNorm = 83.5173, GNorm = 0.7283, lr_0 = 2.0394e-04
Loss = 9.8271e-02, PNorm = 83.5259, GNorm = 0.5032, lr_0 = 2.0380e-04
Loss = 8.9855e-02, PNorm = 83.5329, GNorm = 0.6113, lr_0 = 2.0366e-04
Loss = 9.5845e-02, PNorm = 83.5366, GNorm = 0.6221, lr_0 = 2.0352e-04
Loss = 8.0391e-02, PNorm = 83.5379, GNorm = 0.4960, lr_0 = 2.0338e-04
Loss = 1.0508e-01, PNorm = 83.5408, GNorm = 0.6837, lr_0 = 2.0324e-04
Loss = 8.8502e-02, PNorm = 83.5467, GNorm = 0.7223, lr_0 = 2.0310e-04
Loss = 8.3682e-02, PNorm = 83.5503, GNorm = 0.6980, lr_0 = 2.0296e-04
Loss = 7.8641e-02, PNorm = 83.5533, GNorm = 0.5372, lr_0 = 2.0282e-04
Loss = 8.9679e-02, PNorm = 83.5567, GNorm = 0.6906, lr_0 = 2.0268e-04
Loss = 9.0374e-02, PNorm = 83.5583, GNorm = 0.5935, lr_0 = 2.0254e-04
Loss = 9.0075e-02, PNorm = 83.5611, GNorm = 0.5088, lr_0 = 2.0240e-04
Loss = 9.8150e-02, PNorm = 83.5619, GNorm = 0.6314, lr_0 = 2.0227e-04
Loss = 8.4819e-02, PNorm = 83.5648, GNorm = 0.5827, lr_0 = 2.0213e-04
Loss = 7.8088e-02, PNorm = 83.5725, GNorm = 0.5144, lr_0 = 2.0199e-04
Loss = 8.8828e-02, PNorm = 83.5784, GNorm = 0.6918, lr_0 = 2.0185e-04
Loss = 8.6805e-02, PNorm = 83.5800, GNorm = 0.6017, lr_0 = 2.0171e-04
Loss = 8.4862e-02, PNorm = 83.5825, GNorm = 0.5890, lr_0 = 2.0157e-04
Loss = 7.9912e-02, PNorm = 83.5860, GNorm = 0.6421, lr_0 = 2.0144e-04
Loss = 8.6052e-02, PNorm = 83.5857, GNorm = 0.7499, lr_0 = 2.0130e-04
Loss = 9.8692e-02, PNorm = 83.5911, GNorm = 0.8136, lr_0 = 2.0116e-04
Loss = 8.1503e-02, PNorm = 83.5961, GNorm = 0.8081, lr_0 = 2.0102e-04
Loss = 9.1075e-02, PNorm = 83.5991, GNorm = 0.6982, lr_0 = 2.0088e-04
Loss = 9.0524e-02, PNorm = 83.6007, GNorm = 0.5711, lr_0 = 2.0075e-04
Loss = 8.0859e-02, PNorm = 83.6015, GNorm = 0.5408, lr_0 = 2.0061e-04
Loss = 9.1809e-02, PNorm = 83.6049, GNorm = 0.5874, lr_0 = 2.0047e-04
Loss = 8.0289e-02, PNorm = 83.6088, GNorm = 0.6459, lr_0 = 2.0033e-04
Loss = 1.0118e-01, PNorm = 83.6105, GNorm = 0.6267, lr_0 = 2.0020e-04
Loss = 8.4649e-02, PNorm = 83.6135, GNorm = 0.7286, lr_0 = 2.0006e-04
Loss = 8.3832e-02, PNorm = 83.6150, GNorm = 0.6854, lr_0 = 1.9992e-04
Loss = 9.5314e-02, PNorm = 83.6151, GNorm = 0.7963, lr_0 = 1.9979e-04
Loss = 8.4382e-02, PNorm = 83.6172, GNorm = 0.5368, lr_0 = 1.9965e-04
Loss = 9.0931e-02, PNorm = 83.6223, GNorm = 0.5972, lr_0 = 1.9951e-04
Loss = 8.5656e-02, PNorm = 83.6243, GNorm = 0.5342, lr_0 = 1.9938e-04
Loss = 8.6128e-02, PNorm = 83.6276, GNorm = 0.6407, lr_0 = 1.9924e-04
Loss = 9.1484e-02, PNorm = 83.6331, GNorm = 0.8849, lr_0 = 1.9910e-04
Loss = 8.0354e-02, PNorm = 83.6368, GNorm = 0.5798, lr_0 = 1.9897e-04
Loss = 8.6610e-02, PNorm = 83.6405, GNorm = 0.6255, lr_0 = 1.9883e-04
Loss = 8.8379e-02, PNorm = 83.6431, GNorm = 0.6748, lr_0 = 1.9869e-04
Loss = 8.8189e-02, PNorm = 83.6456, GNorm = 0.4464, lr_0 = 1.9856e-04
Loss = 9.6532e-02, PNorm = 83.6463, GNorm = 0.6189, lr_0 = 1.9842e-04
Loss = 8.9813e-02, PNorm = 83.6496, GNorm = 0.5792, lr_0 = 1.9829e-04
Loss = 9.0833e-02, PNorm = 83.6538, GNorm = 0.9071, lr_0 = 1.9815e-04
Loss = 9.0595e-02, PNorm = 83.6572, GNorm = 0.7088, lr_0 = 1.9801e-04
Loss = 9.3946e-02, PNorm = 83.6604, GNorm = 0.6254, lr_0 = 1.9788e-04
Loss = 8.3848e-02, PNorm = 83.6616, GNorm = 0.7098, lr_0 = 1.9774e-04
Loss = 9.4896e-02, PNorm = 83.6632, GNorm = 0.7103, lr_0 = 1.9761e-04
Loss = 8.3619e-02, PNorm = 83.6671, GNorm = 0.6582, lr_0 = 1.9747e-04
Loss = 9.3272e-02, PNorm = 83.6732, GNorm = 0.6889, lr_0 = 1.9734e-04
Loss = 9.0157e-02, PNorm = 83.6765, GNorm = 0.6121, lr_0 = 1.9720e-04
Loss = 8.6322e-02, PNorm = 83.6827, GNorm = 0.6339, lr_0 = 1.9707e-04
Loss = 9.1746e-02, PNorm = 83.6869, GNorm = 0.6654, lr_0 = 1.9693e-04
Loss = 9.4223e-02, PNorm = 83.6893, GNorm = 0.5814, lr_0 = 1.9680e-04
Loss = 1.0205e-01, PNorm = 83.6941, GNorm = 0.7258, lr_0 = 1.9666e-04
Loss = 9.4630e-02, PNorm = 83.7006, GNorm = 0.7875, lr_0 = 1.9653e-04
Loss = 8.5969e-02, PNorm = 83.7012, GNorm = 0.6084, lr_0 = 1.9639e-04
Loss = 7.9066e-02, PNorm = 83.7023, GNorm = 0.4646, lr_0 = 1.9626e-04
Loss = 8.6775e-02, PNorm = 83.7045, GNorm = 0.7165, lr_0 = 1.9612e-04
Loss = 9.5971e-02, PNorm = 83.7089, GNorm = 0.7152, lr_0 = 1.9599e-04
Loss = 8.7546e-02, PNorm = 83.7134, GNorm = 0.6289, lr_0 = 1.9585e-04
Loss = 8.6800e-02, PNorm = 83.7170, GNorm = 0.5677, lr_0 = 1.9572e-04
Loss = 9.2430e-02, PNorm = 83.7204, GNorm = 0.8041, lr_0 = 1.9559e-04
Loss = 9.3434e-02, PNorm = 83.7251, GNorm = 0.4959, lr_0 = 1.9545e-04
Loss = 8.2344e-02, PNorm = 83.7287, GNorm = 0.6775, lr_0 = 1.9532e-04
Loss = 8.7714e-02, PNorm = 83.7342, GNorm = 0.6218, lr_0 = 1.9518e-04
Loss = 9.5779e-02, PNorm = 83.7372, GNorm = 0.7618, lr_0 = 1.9505e-04
Loss = 9.3951e-02, PNorm = 83.7411, GNorm = 0.4902, lr_0 = 1.9492e-04
Loss = 8.8802e-02, PNorm = 83.7445, GNorm = 0.8244, lr_0 = 1.9478e-04
Loss = 9.8161e-02, PNorm = 83.7472, GNorm = 0.5742, lr_0 = 1.9465e-04
Loss = 9.2991e-02, PNorm = 83.7509, GNorm = 0.7358, lr_0 = 1.9452e-04
Loss = 1.0208e-01, PNorm = 83.7549, GNorm = 0.6769, lr_0 = 1.9438e-04
Loss = 9.0405e-02, PNorm = 83.7572, GNorm = 0.5492, lr_0 = 1.9425e-04
Loss = 9.0060e-02, PNorm = 83.7600, GNorm = 0.5559, lr_0 = 1.9412e-04
Loss = 9.8354e-02, PNorm = 83.7623, GNorm = 0.6355, lr_0 = 1.9398e-04
Loss = 9.9491e-02, PNorm = 83.7669, GNorm = 0.5561, lr_0 = 1.9385e-04
Loss = 9.9677e-02, PNorm = 83.7708, GNorm = 0.7363, lr_0 = 1.9372e-04
Loss = 9.2629e-02, PNorm = 83.7731, GNorm = 0.6326, lr_0 = 1.9359e-04
Loss = 8.7610e-02, PNorm = 83.7744, GNorm = 0.5979, lr_0 = 1.9345e-04
Loss = 9.7314e-02, PNorm = 83.7767, GNorm = 0.8163, lr_0 = 1.9332e-04
Loss = 1.0067e-01, PNorm = 83.7809, GNorm = 0.5403, lr_0 = 1.9319e-04
Loss = 9.9847e-02, PNorm = 83.7840, GNorm = 0.6735, lr_0 = 1.9306e-04
Validation mae = 0.228902
Epoch 22
Loss = 8.0317e-02, PNorm = 83.7878, GNorm = 0.6712, lr_0 = 1.9292e-04
Loss = 7.2518e-02, PNorm = 83.7927, GNorm = 0.5985, lr_0 = 1.9279e-04
Loss = 9.4193e-02, PNorm = 83.7969, GNorm = 0.6713, lr_0 = 1.9266e-04
Loss = 8.5151e-02, PNorm = 83.7988, GNorm = 0.7803, lr_0 = 1.9253e-04
Loss = 7.5700e-02, PNorm = 83.8010, GNorm = 0.6443, lr_0 = 1.9240e-04
Loss = 8.6456e-02, PNorm = 83.8048, GNorm = 0.5953, lr_0 = 1.9226e-04
Loss = 6.8569e-02, PNorm = 83.8116, GNorm = 0.5998, lr_0 = 1.9213e-04
Loss = 9.0580e-02, PNorm = 83.8152, GNorm = 0.5079, lr_0 = 1.9200e-04
Loss = 8.4835e-02, PNorm = 83.8164, GNorm = 0.5486, lr_0 = 1.9187e-04
Loss = 8.0898e-02, PNorm = 83.8232, GNorm = 0.6135, lr_0 = 1.9174e-04
Loss = 8.9850e-02, PNorm = 83.8274, GNorm = 0.6600, lr_0 = 1.9161e-04
Loss = 6.9951e-02, PNorm = 83.8288, GNorm = 0.4402, lr_0 = 1.9148e-04
Loss = 8.5377e-02, PNorm = 83.8321, GNorm = 0.5734, lr_0 = 1.9134e-04
Loss = 8.4820e-02, PNorm = 83.8354, GNorm = 0.6502, lr_0 = 1.9121e-04
Loss = 8.4854e-02, PNorm = 83.8375, GNorm = 0.6344, lr_0 = 1.9108e-04
Loss = 8.8135e-02, PNorm = 83.8378, GNorm = 0.5555, lr_0 = 1.9095e-04
Loss = 9.0591e-02, PNorm = 83.8403, GNorm = 0.7218, lr_0 = 1.9082e-04
Loss = 9.1953e-02, PNorm = 83.8430, GNorm = 0.6977, lr_0 = 1.9069e-04
Loss = 8.5611e-02, PNorm = 83.8464, GNorm = 0.6940, lr_0 = 1.9056e-04
Loss = 9.4112e-02, PNorm = 83.8501, GNorm = 0.7480, lr_0 = 1.9043e-04
Loss = 7.9433e-02, PNorm = 83.8528, GNorm = 0.6288, lr_0 = 1.9030e-04
Loss = 1.0581e-01, PNorm = 83.8579, GNorm = 0.8653, lr_0 = 1.9017e-04
Loss = 9.8116e-02, PNorm = 83.8615, GNorm = 0.6293, lr_0 = 1.9004e-04
Loss = 8.8984e-02, PNorm = 83.8639, GNorm = 0.8150, lr_0 = 1.8991e-04
Loss = 9.3533e-02, PNorm = 83.8676, GNorm = 0.5398, lr_0 = 1.8978e-04
Loss = 7.9827e-02, PNorm = 83.8699, GNorm = 0.5227, lr_0 = 1.8965e-04
Loss = 7.8200e-02, PNorm = 83.8727, GNorm = 0.5017, lr_0 = 1.8952e-04
Loss = 7.5873e-02, PNorm = 83.8757, GNorm = 0.7123, lr_0 = 1.8939e-04
Loss = 9.0553e-02, PNorm = 83.8816, GNorm = 0.6289, lr_0 = 1.8926e-04
Loss = 8.7085e-02, PNorm = 83.8854, GNorm = 0.8188, lr_0 = 1.8913e-04
Loss = 9.6351e-02, PNorm = 83.8866, GNorm = 0.6276, lr_0 = 1.8900e-04
Loss = 8.7833e-02, PNorm = 83.8918, GNorm = 0.7474, lr_0 = 1.8887e-04
Loss = 9.3502e-02, PNorm = 83.8945, GNorm = 0.6699, lr_0 = 1.8874e-04
Loss = 8.6999e-02, PNorm = 83.8947, GNorm = 0.7163, lr_0 = 1.8861e-04
Loss = 8.8630e-02, PNorm = 83.8987, GNorm = 0.6277, lr_0 = 1.8848e-04
Loss = 7.8203e-02, PNorm = 83.9027, GNorm = 0.6958, lr_0 = 1.8835e-04
Loss = 8.7342e-02, PNorm = 83.9034, GNorm = 0.7636, lr_0 = 1.8822e-04
Loss = 8.8469e-02, PNorm = 83.9059, GNorm = 0.8185, lr_0 = 1.8809e-04
Loss = 9.6933e-02, PNorm = 83.9093, GNorm = 0.6646, lr_0 = 1.8797e-04
Loss = 7.6940e-02, PNorm = 83.9129, GNorm = 0.7186, lr_0 = 1.8784e-04
Loss = 8.6678e-02, PNorm = 83.9182, GNorm = 0.6696, lr_0 = 1.8771e-04
Loss = 1.0887e-01, PNorm = 83.9191, GNorm = 0.6843, lr_0 = 1.8758e-04
Loss = 8.2151e-02, PNorm = 83.9224, GNorm = 0.4976, lr_0 = 1.8745e-04
Loss = 9.0132e-02, PNorm = 83.9282, GNorm = 0.5922, lr_0 = 1.8732e-04
Loss = 9.0134e-02, PNorm = 83.9335, GNorm = 0.5734, lr_0 = 1.8719e-04
Loss = 8.4494e-02, PNorm = 83.9379, GNorm = 0.5373, lr_0 = 1.8707e-04
Loss = 9.4887e-02, PNorm = 83.9428, GNorm = 0.6088, lr_0 = 1.8694e-04
Loss = 1.0629e-01, PNorm = 83.9466, GNorm = 0.6513, lr_0 = 1.8681e-04
Loss = 9.1617e-02, PNorm = 83.9464, GNorm = 0.5831, lr_0 = 1.8668e-04
Loss = 9.3995e-02, PNorm = 83.9484, GNorm = 0.6706, lr_0 = 1.8655e-04
Loss = 9.1388e-02, PNorm = 83.9497, GNorm = 0.8230, lr_0 = 1.8643e-04
Loss = 8.2771e-02, PNorm = 83.9545, GNorm = 0.5597, lr_0 = 1.8630e-04
Loss = 8.1517e-02, PNorm = 83.9583, GNorm = 1.0162, lr_0 = 1.8617e-04
Loss = 8.0469e-02, PNorm = 83.9612, GNorm = 0.6181, lr_0 = 1.8604e-04
Loss = 8.6288e-02, PNorm = 83.9645, GNorm = 0.7939, lr_0 = 1.8592e-04
Loss = 8.0938e-02, PNorm = 83.9673, GNorm = 0.5580, lr_0 = 1.8579e-04
Loss = 8.0759e-02, PNorm = 83.9690, GNorm = 0.4790, lr_0 = 1.8566e-04
Loss = 9.9883e-02, PNorm = 83.9735, GNorm = 0.4520, lr_0 = 1.8553e-04
Loss = 8.2156e-02, PNorm = 83.9783, GNorm = 0.6529, lr_0 = 1.8541e-04
Loss = 8.9049e-02, PNorm = 83.9825, GNorm = 0.4551, lr_0 = 1.8528e-04
Loss = 8.9355e-02, PNorm = 83.9847, GNorm = 0.7310, lr_0 = 1.8515e-04
Loss = 9.3295e-02, PNorm = 83.9866, GNorm = 0.7956, lr_0 = 1.8503e-04
Loss = 9.2725e-02, PNorm = 83.9890, GNorm = 0.6691, lr_0 = 1.8490e-04
Loss = 8.4753e-02, PNorm = 83.9923, GNorm = 0.6864, lr_0 = 1.8477e-04
Loss = 9.4109e-02, PNorm = 83.9951, GNorm = 0.7343, lr_0 = 1.8465e-04
Loss = 9.9119e-02, PNorm = 83.9981, GNorm = 0.8462, lr_0 = 1.8452e-04
Loss = 9.4066e-02, PNorm = 83.9995, GNorm = 0.6691, lr_0 = 1.8439e-04
Loss = 8.3081e-02, PNorm = 84.0033, GNorm = 0.6943, lr_0 = 1.8427e-04
Loss = 8.9063e-02, PNorm = 84.0054, GNorm = 0.6551, lr_0 = 1.8414e-04
Loss = 8.7661e-02, PNorm = 84.0080, GNorm = 0.6552, lr_0 = 1.8401e-04
Loss = 9.9221e-02, PNorm = 84.0129, GNorm = 0.6512, lr_0 = 1.8389e-04
Loss = 8.5583e-02, PNorm = 84.0157, GNorm = 0.6189, lr_0 = 1.8376e-04
Loss = 8.8635e-02, PNorm = 84.0190, GNorm = 0.6811, lr_0 = 1.8364e-04
Loss = 8.2988e-02, PNorm = 84.0219, GNorm = 0.5714, lr_0 = 1.8351e-04
Loss = 8.7726e-02, PNorm = 84.0251, GNorm = 0.7524, lr_0 = 1.8338e-04
Loss = 9.7214e-02, PNorm = 84.0277, GNorm = 1.1755, lr_0 = 1.8326e-04
Loss = 8.7775e-02, PNorm = 84.0293, GNorm = 0.7308, lr_0 = 1.8313e-04
Loss = 7.4995e-02, PNorm = 84.0332, GNorm = 0.5174, lr_0 = 1.8301e-04
Loss = 7.5208e-02, PNorm = 84.0371, GNorm = 0.5371, lr_0 = 1.8288e-04
Loss = 7.9570e-02, PNorm = 84.0374, GNorm = 0.7843, lr_0 = 1.8276e-04
Loss = 8.6724e-02, PNorm = 84.0405, GNorm = 0.7315, lr_0 = 1.8263e-04
Loss = 8.9213e-02, PNorm = 84.0449, GNorm = 0.7459, lr_0 = 1.8251e-04
Loss = 8.0270e-02, PNorm = 84.0473, GNorm = 0.4557, lr_0 = 1.8238e-04
Loss = 8.1379e-02, PNorm = 84.0515, GNorm = 0.4929, lr_0 = 1.8226e-04
Loss = 8.6865e-02, PNorm = 84.0539, GNorm = 0.8733, lr_0 = 1.8213e-04
Loss = 8.9550e-02, PNorm = 84.0552, GNorm = 0.7298, lr_0 = 1.8201e-04
Loss = 8.3973e-02, PNorm = 84.0570, GNorm = 0.6239, lr_0 = 1.8188e-04
Loss = 8.8243e-02, PNorm = 84.0593, GNorm = 0.5837, lr_0 = 1.8176e-04
Loss = 8.0699e-02, PNorm = 84.0607, GNorm = 0.5901, lr_0 = 1.8163e-04
Loss = 8.0217e-02, PNorm = 84.0629, GNorm = 0.4691, lr_0 = 1.8151e-04
Loss = 8.1848e-02, PNorm = 84.0666, GNorm = 0.5452, lr_0 = 1.8138e-04
Loss = 8.8149e-02, PNorm = 84.0686, GNorm = 0.6563, lr_0 = 1.8126e-04
Loss = 1.0139e-01, PNorm = 84.0727, GNorm = 0.7042, lr_0 = 1.8114e-04
Loss = 8.1307e-02, PNorm = 84.0759, GNorm = 0.6301, lr_0 = 1.8101e-04
Loss = 9.5745e-02, PNorm = 84.0803, GNorm = 0.8468, lr_0 = 1.8089e-04
Loss = 8.5154e-02, PNorm = 84.0841, GNorm = 0.9015, lr_0 = 1.8076e-04
Loss = 8.8507e-02, PNorm = 84.0880, GNorm = 0.6337, lr_0 = 1.8064e-04
Loss = 9.2884e-02, PNorm = 84.0909, GNorm = 0.6349, lr_0 = 1.8052e-04
Loss = 8.0059e-02, PNorm = 84.0932, GNorm = 0.6113, lr_0 = 1.8039e-04
Loss = 8.5100e-02, PNorm = 84.0945, GNorm = 0.6448, lr_0 = 1.8027e-04
Loss = 8.7299e-02, PNorm = 84.0964, GNorm = 0.5899, lr_0 = 1.8015e-04
Loss = 8.8330e-02, PNorm = 84.0992, GNorm = 0.8436, lr_0 = 1.8002e-04
Loss = 1.0692e-01, PNorm = 84.1015, GNorm = 0.6202, lr_0 = 1.7990e-04
Loss = 8.7072e-02, PNorm = 84.1056, GNorm = 0.6161, lr_0 = 1.7978e-04
Loss = 1.0058e-01, PNorm = 84.1078, GNorm = 1.0646, lr_0 = 1.7965e-04
Loss = 8.5754e-02, PNorm = 84.1116, GNorm = 0.6480, lr_0 = 1.7953e-04
Loss = 9.3937e-02, PNorm = 84.1165, GNorm = 0.5518, lr_0 = 1.7941e-04
Loss = 7.9665e-02, PNorm = 84.1203, GNorm = 0.4862, lr_0 = 1.7928e-04
Loss = 9.1270e-02, PNorm = 84.1206, GNorm = 0.6462, lr_0 = 1.7916e-04
Loss = 9.9372e-02, PNorm = 84.1232, GNorm = 0.5361, lr_0 = 1.7904e-04
Loss = 8.1167e-02, PNorm = 84.1259, GNorm = 0.5923, lr_0 = 1.7892e-04
Loss = 8.7595e-02, PNorm = 84.1260, GNorm = 0.6465, lr_0 = 1.7879e-04
Loss = 7.9985e-02, PNorm = 84.1268, GNorm = 0.7492, lr_0 = 1.7867e-04
Loss = 8.2136e-02, PNorm = 84.1284, GNorm = 0.4568, lr_0 = 1.7855e-04
Loss = 8.4926e-02, PNorm = 84.1337, GNorm = 0.5565, lr_0 = 1.7843e-04
Loss = 9.2987e-02, PNorm = 84.1366, GNorm = 0.7866, lr_0 = 1.7830e-04
Loss = 8.7693e-02, PNorm = 84.1409, GNorm = 0.7334, lr_0 = 1.7818e-04
Loss = 9.2942e-02, PNorm = 84.1445, GNorm = 0.6969, lr_0 = 1.7806e-04
Loss = 8.0935e-02, PNorm = 84.1481, GNorm = 0.7094, lr_0 = 1.7794e-04
Loss = 9.8066e-02, PNorm = 84.1516, GNorm = 0.7807, lr_0 = 1.7782e-04
Validation mae = 0.229828
Epoch 23
Loss = 7.7669e-02, PNorm = 84.1550, GNorm = 0.5893, lr_0 = 1.7769e-04
Loss = 8.0297e-02, PNorm = 84.1602, GNorm = 0.5431, lr_0 = 1.7757e-04
Loss = 8.8216e-02, PNorm = 84.1631, GNorm = 0.5149, lr_0 = 1.7745e-04
Loss = 8.3938e-02, PNorm = 84.1676, GNorm = 0.7515, lr_0 = 1.7733e-04
Loss = 8.1213e-02, PNorm = 84.1698, GNorm = 0.7963, lr_0 = 1.7721e-04
Loss = 7.7892e-02, PNorm = 84.1730, GNorm = 0.6051, lr_0 = 1.7709e-04
Loss = 8.9923e-02, PNorm = 84.1782, GNorm = 0.7405, lr_0 = 1.7696e-04
Loss = 7.9840e-02, PNorm = 84.1793, GNorm = 0.6067, lr_0 = 1.7684e-04
Loss = 9.0091e-02, PNorm = 84.1809, GNorm = 0.4772, lr_0 = 1.7672e-04
Loss = 8.2245e-02, PNorm = 84.1828, GNorm = 0.7705, lr_0 = 1.7660e-04
Loss = 8.1807e-02, PNorm = 84.1838, GNorm = 0.5272, lr_0 = 1.7648e-04
Loss = 8.1026e-02, PNorm = 84.1856, GNorm = 0.5521, lr_0 = 1.7636e-04
Loss = 8.6116e-02, PNorm = 84.1903, GNorm = 0.5173, lr_0 = 1.7624e-04
Loss = 8.9408e-02, PNorm = 84.1954, GNorm = 0.6699, lr_0 = 1.7612e-04
Loss = 7.9963e-02, PNorm = 84.1974, GNorm = 0.5856, lr_0 = 1.7600e-04
Loss = 6.9908e-02, PNorm = 84.2015, GNorm = 0.5369, lr_0 = 1.7588e-04
Loss = 8.2798e-02, PNorm = 84.2055, GNorm = 0.8640, lr_0 = 1.7576e-04
Loss = 8.3628e-02, PNorm = 84.2070, GNorm = 0.5323, lr_0 = 1.7564e-04
Loss = 8.3003e-02, PNorm = 84.2073, GNorm = 0.5170, lr_0 = 1.7552e-04
Loss = 9.3145e-02, PNorm = 84.2102, GNorm = 0.4893, lr_0 = 1.7540e-04
Loss = 7.6778e-02, PNorm = 84.2133, GNorm = 0.6610, lr_0 = 1.7528e-04
Loss = 8.6311e-02, PNorm = 84.2170, GNorm = 0.6339, lr_0 = 1.7516e-04
Loss = 7.9501e-02, PNorm = 84.2194, GNorm = 0.7159, lr_0 = 1.7504e-04
Loss = 9.1399e-02, PNorm = 84.2215, GNorm = 0.8305, lr_0 = 1.7492e-04
Loss = 8.2676e-02, PNorm = 84.2256, GNorm = 0.7803, lr_0 = 1.7480e-04
Loss = 7.4813e-02, PNorm = 84.2288, GNorm = 0.5569, lr_0 = 1.7468e-04
Loss = 9.0203e-02, PNorm = 84.2353, GNorm = 0.7783, lr_0 = 1.7456e-04
Loss = 8.2906e-02, PNorm = 84.2392, GNorm = 0.7862, lr_0 = 1.7444e-04
Loss = 8.6540e-02, PNorm = 84.2401, GNorm = 0.6313, lr_0 = 1.7432e-04
Loss = 9.1151e-02, PNorm = 84.2414, GNorm = 0.6533, lr_0 = 1.7420e-04
Loss = 7.5185e-02, PNorm = 84.2432, GNorm = 0.5342, lr_0 = 1.7408e-04
Loss = 8.7561e-02, PNorm = 84.2418, GNorm = 0.5383, lr_0 = 1.7396e-04
Loss = 8.4726e-02, PNorm = 84.2443, GNorm = 0.6532, lr_0 = 1.7384e-04
Loss = 7.4374e-02, PNorm = 84.2479, GNorm = 0.5245, lr_0 = 1.7372e-04
Loss = 6.4348e-02, PNorm = 84.2514, GNorm = 0.6387, lr_0 = 1.7360e-04
Loss = 9.4169e-02, PNorm = 84.2540, GNorm = 0.8561, lr_0 = 1.7348e-04
Loss = 8.5983e-02, PNorm = 84.2550, GNorm = 0.5651, lr_0 = 1.7336e-04
Loss = 8.4888e-02, PNorm = 84.2585, GNorm = 0.6254, lr_0 = 1.7325e-04
Loss = 9.1908e-02, PNorm = 84.2640, GNorm = 0.6610, lr_0 = 1.7313e-04
Loss = 8.2959e-02, PNorm = 84.2670, GNorm = 0.6835, lr_0 = 1.7301e-04
Loss = 8.2565e-02, PNorm = 84.2697, GNorm = 0.5590, lr_0 = 1.7289e-04
Loss = 7.5898e-02, PNorm = 84.2742, GNorm = 0.5670, lr_0 = 1.7277e-04
Loss = 7.9890e-02, PNorm = 84.2755, GNorm = 0.5857, lr_0 = 1.7265e-04
Loss = 8.1260e-02, PNorm = 84.2769, GNorm = 0.7138, lr_0 = 1.7253e-04
Loss = 8.7101e-02, PNorm = 84.2813, GNorm = 0.7202, lr_0 = 1.7242e-04
Loss = 8.0841e-02, PNorm = 84.2806, GNorm = 0.7882, lr_0 = 1.7230e-04
Loss = 7.7480e-02, PNorm = 84.2834, GNorm = 0.4468, lr_0 = 1.7218e-04
Loss = 7.8394e-02, PNorm = 84.2887, GNorm = 0.4128, lr_0 = 1.7206e-04
Loss = 9.2749e-02, PNorm = 84.2934, GNorm = 0.5772, lr_0 = 1.7194e-04
Loss = 9.2174e-02, PNorm = 84.2992, GNorm = 0.7181, lr_0 = 1.7183e-04
Loss = 7.7026e-02, PNorm = 84.3019, GNorm = 0.8380, lr_0 = 1.7171e-04
Loss = 9.8188e-02, PNorm = 84.3041, GNorm = 0.7586, lr_0 = 1.7159e-04
Loss = 9.1629e-02, PNorm = 84.3079, GNorm = 0.5957, lr_0 = 1.7147e-04
Loss = 7.4005e-02, PNorm = 84.3105, GNorm = 0.4608, lr_0 = 1.7136e-04
Loss = 7.9931e-02, PNorm = 84.3147, GNorm = 0.5623, lr_0 = 1.7124e-04
Loss = 8.4318e-02, PNorm = 84.3174, GNorm = 0.4783, lr_0 = 1.7112e-04
Loss = 8.6618e-02, PNorm = 84.3198, GNorm = 0.5779, lr_0 = 1.7100e-04
Loss = 8.1434e-02, PNorm = 84.3247, GNorm = 0.6722, lr_0 = 1.7089e-04
Loss = 8.6531e-02, PNorm = 84.3283, GNorm = 0.6947, lr_0 = 1.7077e-04
Loss = 8.5907e-02, PNorm = 84.3329, GNorm = 0.5801, lr_0 = 1.7065e-04
Loss = 8.5739e-02, PNorm = 84.3372, GNorm = 0.8903, lr_0 = 1.7054e-04
Loss = 9.4222e-02, PNorm = 84.3400, GNorm = 0.4598, lr_0 = 1.7042e-04
Loss = 9.0777e-02, PNorm = 84.3443, GNorm = 0.9375, lr_0 = 1.7030e-04
Loss = 9.1024e-02, PNorm = 84.3469, GNorm = 0.5693, lr_0 = 1.7019e-04
Loss = 8.1877e-02, PNorm = 84.3494, GNorm = 0.7760, lr_0 = 1.7007e-04
Loss = 9.2644e-02, PNorm = 84.3523, GNorm = 0.7041, lr_0 = 1.6995e-04
Loss = 7.7584e-02, PNorm = 84.3543, GNorm = 0.5693, lr_0 = 1.6984e-04
Loss = 9.0027e-02, PNorm = 84.3560, GNorm = 0.5393, lr_0 = 1.6972e-04
Loss = 8.0239e-02, PNorm = 84.3597, GNorm = 0.5976, lr_0 = 1.6960e-04
Loss = 8.2126e-02, PNorm = 84.3648, GNorm = 0.7042, lr_0 = 1.6949e-04
Loss = 7.5432e-02, PNorm = 84.3688, GNorm = 0.8221, lr_0 = 1.6937e-04
Loss = 1.0916e-01, PNorm = 84.3746, GNorm = 0.7736, lr_0 = 1.6926e-04
Loss = 9.2907e-02, PNorm = 84.3792, GNorm = 0.6366, lr_0 = 1.6914e-04
Loss = 8.7616e-02, PNorm = 84.3821, GNorm = 0.4815, lr_0 = 1.6902e-04
Loss = 7.8272e-02, PNorm = 84.3856, GNorm = 0.7693, lr_0 = 1.6891e-04
Loss = 8.4177e-02, PNorm = 84.3896, GNorm = 0.5848, lr_0 = 1.6879e-04
Loss = 8.4386e-02, PNorm = 84.3938, GNorm = 0.7452, lr_0 = 1.6868e-04
Loss = 7.0165e-02, PNorm = 84.3958, GNorm = 0.6407, lr_0 = 1.6856e-04
Loss = 8.6012e-02, PNorm = 84.3982, GNorm = 0.6455, lr_0 = 1.6845e-04
Loss = 8.3879e-02, PNorm = 84.4019, GNorm = 0.6149, lr_0 = 1.6833e-04
Loss = 7.5566e-02, PNorm = 84.4046, GNorm = 0.6863, lr_0 = 1.6821e-04
Loss = 8.3469e-02, PNorm = 84.4058, GNorm = 0.6520, lr_0 = 1.6810e-04
Loss = 8.3066e-02, PNorm = 84.4066, GNorm = 0.6262, lr_0 = 1.6798e-04
Loss = 9.1232e-02, PNorm = 84.4086, GNorm = 0.5564, lr_0 = 1.6787e-04
Loss = 8.4882e-02, PNorm = 84.4092, GNorm = 0.7982, lr_0 = 1.6775e-04
Loss = 1.0282e-01, PNorm = 84.4105, GNorm = 0.7241, lr_0 = 1.6764e-04
Loss = 8.7216e-02, PNorm = 84.4166, GNorm = 0.5432, lr_0 = 1.6752e-04
Loss = 8.3500e-02, PNorm = 84.4231, GNorm = 0.7492, lr_0 = 1.6741e-04
Loss = 9.3323e-02, PNorm = 84.4259, GNorm = 0.7505, lr_0 = 1.6729e-04
Loss = 8.6882e-02, PNorm = 84.4279, GNorm = 0.5309, lr_0 = 1.6718e-04
Loss = 8.6938e-02, PNorm = 84.4302, GNorm = 0.6322, lr_0 = 1.6707e-04
Loss = 8.2423e-02, PNorm = 84.4315, GNorm = 0.6367, lr_0 = 1.6695e-04
Loss = 8.3495e-02, PNorm = 84.4318, GNorm = 0.5738, lr_0 = 1.6684e-04
Loss = 9.1839e-02, PNorm = 84.4330, GNorm = 0.5878, lr_0 = 1.6672e-04
Loss = 8.5863e-02, PNorm = 84.4356, GNorm = 0.6781, lr_0 = 1.6661e-04
Loss = 8.2152e-02, PNorm = 84.4382, GNorm = 0.5276, lr_0 = 1.6649e-04
Loss = 8.9617e-02, PNorm = 84.4409, GNorm = 0.6635, lr_0 = 1.6638e-04
Loss = 8.0546e-02, PNorm = 84.4431, GNorm = 0.5481, lr_0 = 1.6627e-04
Loss = 9.9034e-02, PNorm = 84.4461, GNorm = 0.9720, lr_0 = 1.6615e-04
Loss = 9.4297e-02, PNorm = 84.4488, GNorm = 0.6158, lr_0 = 1.6604e-04
Loss = 8.6985e-02, PNorm = 84.4526, GNorm = 0.7490, lr_0 = 1.6592e-04
Loss = 8.6788e-02, PNorm = 84.4556, GNorm = 0.6536, lr_0 = 1.6581e-04
Loss = 8.4394e-02, PNorm = 84.4573, GNorm = 0.8850, lr_0 = 1.6570e-04
Loss = 9.8859e-02, PNorm = 84.4593, GNorm = 0.8906, lr_0 = 1.6558e-04
Loss = 8.6196e-02, PNorm = 84.4613, GNorm = 0.6841, lr_0 = 1.6547e-04
Loss = 9.3898e-02, PNorm = 84.4635, GNorm = 0.8746, lr_0 = 1.6536e-04
Loss = 7.4471e-02, PNorm = 84.4662, GNorm = 0.4337, lr_0 = 1.6524e-04
Loss = 9.1799e-02, PNorm = 84.4704, GNorm = 0.6793, lr_0 = 1.6513e-04
Loss = 9.2719e-02, PNorm = 84.4747, GNorm = 0.5877, lr_0 = 1.6502e-04
Loss = 8.8376e-02, PNorm = 84.4784, GNorm = 0.7986, lr_0 = 1.6490e-04
Loss = 9.1357e-02, PNorm = 84.4802, GNorm = 0.7226, lr_0 = 1.6479e-04
Loss = 7.8903e-02, PNorm = 84.4834, GNorm = 0.5867, lr_0 = 1.6468e-04
Loss = 8.7471e-02, PNorm = 84.4868, GNorm = 0.9275, lr_0 = 1.6457e-04
Loss = 7.7510e-02, PNorm = 84.4873, GNorm = 0.8231, lr_0 = 1.6445e-04
Loss = 7.9895e-02, PNorm = 84.4912, GNorm = 0.7371, lr_0 = 1.6434e-04
Loss = 9.1846e-02, PNorm = 84.4947, GNorm = 0.8462, lr_0 = 1.6423e-04
Loss = 8.7429e-02, PNorm = 84.4968, GNorm = 0.7487, lr_0 = 1.6412e-04
Loss = 8.8090e-02, PNorm = 84.4985, GNorm = 0.5947, lr_0 = 1.6400e-04
Loss = 8.6911e-02, PNorm = 84.5024, GNorm = 0.5381, lr_0 = 1.6389e-04
Loss = 8.6389e-02, PNorm = 84.5050, GNorm = 0.6471, lr_0 = 1.6378e-04
Validation mae = 0.231273
Epoch 24
Loss = 8.7718e-02, PNorm = 84.5062, GNorm = 0.7276, lr_0 = 1.6367e-04
Loss = 8.4510e-02, PNorm = 84.5095, GNorm = 0.5453, lr_0 = 1.6355e-04
Loss = 7.7673e-02, PNorm = 84.5109, GNorm = 0.8030, lr_0 = 1.6344e-04
Loss = 7.9785e-02, PNorm = 84.5154, GNorm = 0.9460, lr_0 = 1.6333e-04
Loss = 8.4045e-02, PNorm = 84.5168, GNorm = 0.6043, lr_0 = 1.6322e-04
Loss = 7.3690e-02, PNorm = 84.5194, GNorm = 0.4248, lr_0 = 1.6311e-04
Loss = 8.0223e-02, PNorm = 84.5231, GNorm = 0.6568, lr_0 = 1.6299e-04
Loss = 8.3405e-02, PNorm = 84.5245, GNorm = 0.7230, lr_0 = 1.6288e-04
Loss = 8.2185e-02, PNorm = 84.5259, GNorm = 0.8366, lr_0 = 1.6277e-04
Loss = 8.3007e-02, PNorm = 84.5277, GNorm = 0.6614, lr_0 = 1.6266e-04
Loss = 6.9097e-02, PNorm = 84.5293, GNorm = 0.5555, lr_0 = 1.6255e-04
Loss = 8.5191e-02, PNorm = 84.5326, GNorm = 0.6592, lr_0 = 1.6244e-04
Loss = 9.1631e-02, PNorm = 84.5369, GNorm = 0.6096, lr_0 = 1.6233e-04
Loss = 8.1211e-02, PNorm = 84.5401, GNorm = 0.5998, lr_0 = 1.6221e-04
Loss = 8.2114e-02, PNorm = 84.5433, GNorm = 0.5830, lr_0 = 1.6210e-04
Loss = 7.9947e-02, PNorm = 84.5464, GNorm = 0.8518, lr_0 = 1.6199e-04
Loss = 9.5715e-02, PNorm = 84.5483, GNorm = 0.6654, lr_0 = 1.6188e-04
Loss = 8.4400e-02, PNorm = 84.5519, GNorm = 0.5041, lr_0 = 1.6177e-04
Loss = 8.4082e-02, PNorm = 84.5555, GNorm = 0.6490, lr_0 = 1.6166e-04
Loss = 9.0177e-02, PNorm = 84.5559, GNorm = 0.5553, lr_0 = 1.6155e-04
Loss = 7.7718e-02, PNorm = 84.5561, GNorm = 0.7402, lr_0 = 1.6144e-04
Loss = 8.0443e-02, PNorm = 84.5580, GNorm = 0.6811, lr_0 = 1.6133e-04
Loss = 8.4119e-02, PNorm = 84.5599, GNorm = 1.1332, lr_0 = 1.6122e-04
Loss = 8.6214e-02, PNorm = 84.5641, GNorm = 0.6844, lr_0 = 1.6111e-04
Loss = 8.1446e-02, PNorm = 84.5678, GNorm = 0.8215, lr_0 = 1.6100e-04
Loss = 7.8997e-02, PNorm = 84.5696, GNorm = 0.5385, lr_0 = 1.6089e-04
Loss = 7.9879e-02, PNorm = 84.5710, GNorm = 0.7365, lr_0 = 1.6078e-04
Loss = 8.6821e-02, PNorm = 84.5720, GNorm = 0.5842, lr_0 = 1.6067e-04
Loss = 7.5979e-02, PNorm = 84.5736, GNorm = 0.6459, lr_0 = 1.6056e-04
Loss = 8.4986e-02, PNorm = 84.5758, GNorm = 0.6495, lr_0 = 1.6045e-04
Loss = 8.3562e-02, PNorm = 84.5772, GNorm = 0.6084, lr_0 = 1.6034e-04
Loss = 8.2987e-02, PNorm = 84.5799, GNorm = 0.6895, lr_0 = 1.6023e-04
Loss = 8.3695e-02, PNorm = 84.5838, GNorm = 0.8201, lr_0 = 1.6012e-04
Loss = 7.0985e-02, PNorm = 84.5853, GNorm = 0.7382, lr_0 = 1.6001e-04
Loss = 7.5911e-02, PNorm = 84.5867, GNorm = 0.7302, lr_0 = 1.5990e-04
Loss = 8.3602e-02, PNorm = 84.5900, GNorm = 0.5335, lr_0 = 1.5979e-04
Loss = 8.8352e-02, PNorm = 84.5924, GNorm = 0.4902, lr_0 = 1.5968e-04
Loss = 8.1555e-02, PNorm = 84.5960, GNorm = 0.5300, lr_0 = 1.5957e-04
Loss = 8.4276e-02, PNorm = 84.6009, GNorm = 0.6691, lr_0 = 1.5946e-04
Loss = 8.0771e-02, PNorm = 84.6036, GNorm = 0.5893, lr_0 = 1.5935e-04
Loss = 7.9342e-02, PNorm = 84.6054, GNorm = 0.7001, lr_0 = 1.5924e-04
Loss = 7.7033e-02, PNorm = 84.6083, GNorm = 0.5807, lr_0 = 1.5913e-04
Loss = 8.5024e-02, PNorm = 84.6110, GNorm = 0.8891, lr_0 = 1.5902e-04
Loss = 8.1533e-02, PNorm = 84.6137, GNorm = 0.5825, lr_0 = 1.5891e-04
Loss = 9.3566e-02, PNorm = 84.6167, GNorm = 0.5956, lr_0 = 1.5880e-04
Loss = 7.5937e-02, PNorm = 84.6207, GNorm = 0.4931, lr_0 = 1.5870e-04
Loss = 8.5655e-02, PNorm = 84.6243, GNorm = 0.7499, lr_0 = 1.5859e-04
Loss = 7.8039e-02, PNorm = 84.6264, GNorm = 0.7401, lr_0 = 1.5848e-04
Loss = 7.4928e-02, PNorm = 84.6294, GNorm = 0.5702, lr_0 = 1.5837e-04
Loss = 7.5778e-02, PNorm = 84.6312, GNorm = 0.6047, lr_0 = 1.5826e-04
Loss = 7.4594e-02, PNorm = 84.6330, GNorm = 0.5713, lr_0 = 1.5815e-04
Loss = 8.0188e-02, PNorm = 84.6367, GNorm = 0.5485, lr_0 = 1.5804e-04
Loss = 7.7660e-02, PNorm = 84.6392, GNorm = 0.6396, lr_0 = 1.5794e-04
Loss = 8.8525e-02, PNorm = 84.6400, GNorm = 1.2045, lr_0 = 1.5783e-04
Loss = 8.9333e-02, PNorm = 84.6416, GNorm = 0.6337, lr_0 = 1.5772e-04
Loss = 8.2872e-02, PNorm = 84.6435, GNorm = 0.6732, lr_0 = 1.5761e-04
Loss = 9.0948e-02, PNorm = 84.6474, GNorm = 0.6753, lr_0 = 1.5750e-04
Loss = 8.3141e-02, PNorm = 84.6493, GNorm = 0.7232, lr_0 = 1.5740e-04
Loss = 7.6107e-02, PNorm = 84.6504, GNorm = 0.8874, lr_0 = 1.5729e-04
Loss = 8.5220e-02, PNorm = 84.6524, GNorm = 0.5739, lr_0 = 1.5718e-04
Loss = 7.9549e-02, PNorm = 84.6542, GNorm = 0.5612, lr_0 = 1.5707e-04
Loss = 8.4550e-02, PNorm = 84.6540, GNorm = 0.7606, lr_0 = 1.5697e-04
Loss = 8.0423e-02, PNorm = 84.6534, GNorm = 0.4665, lr_0 = 1.5686e-04
Loss = 8.3605e-02, PNorm = 84.6539, GNorm = 0.7203, lr_0 = 1.5675e-04
Loss = 7.2217e-02, PNorm = 84.6539, GNorm = 1.0092, lr_0 = 1.5664e-04
Loss = 7.6677e-02, PNorm = 84.6552, GNorm = 0.4789, lr_0 = 1.5654e-04
Loss = 8.8594e-02, PNorm = 84.6584, GNorm = 0.5703, lr_0 = 1.5643e-04
Loss = 9.2730e-02, PNorm = 84.6608, GNorm = 0.5229, lr_0 = 1.5632e-04
Loss = 9.3392e-02, PNorm = 84.6642, GNorm = 1.1191, lr_0 = 1.5621e-04
Loss = 7.8824e-02, PNorm = 84.6682, GNorm = 0.6036, lr_0 = 1.5611e-04
Loss = 9.1548e-02, PNorm = 84.6703, GNorm = 0.6258, lr_0 = 1.5600e-04
Loss = 8.9639e-02, PNorm = 84.6732, GNorm = 0.8038, lr_0 = 1.5589e-04
Loss = 8.3832e-02, PNorm = 84.6764, GNorm = 0.5382, lr_0 = 1.5579e-04
Loss = 8.8350e-02, PNorm = 84.6800, GNorm = 0.5656, lr_0 = 1.5568e-04
Loss = 8.5734e-02, PNorm = 84.6806, GNorm = 0.5476, lr_0 = 1.5557e-04
Loss = 9.2870e-02, PNorm = 84.6812, GNorm = 0.8222, lr_0 = 1.5547e-04
Loss = 8.0670e-02, PNorm = 84.6837, GNorm = 0.5881, lr_0 = 1.5536e-04
Loss = 7.3360e-02, PNorm = 84.6870, GNorm = 0.7115, lr_0 = 1.5525e-04
Loss = 9.0550e-02, PNorm = 84.6906, GNorm = 0.5326, lr_0 = 1.5515e-04
Loss = 8.7486e-02, PNorm = 84.6908, GNorm = 0.6047, lr_0 = 1.5504e-04
Loss = 8.3302e-02, PNorm = 84.6930, GNorm = 0.9992, lr_0 = 1.5493e-04
Loss = 8.9467e-02, PNorm = 84.6973, GNorm = 0.6141, lr_0 = 1.5483e-04
Loss = 7.3958e-02, PNorm = 84.6983, GNorm = 0.5099, lr_0 = 1.5472e-04
Loss = 8.3291e-02, PNorm = 84.7003, GNorm = 0.6058, lr_0 = 1.5462e-04
Loss = 8.0314e-02, PNorm = 84.7031, GNorm = 0.6724, lr_0 = 1.5451e-04
Loss = 8.1696e-02, PNorm = 84.7047, GNorm = 0.6425, lr_0 = 1.5440e-04
Loss = 8.2370e-02, PNorm = 84.7074, GNorm = 0.6068, lr_0 = 1.5430e-04
Loss = 9.2330e-02, PNorm = 84.7124, GNorm = 0.6254, lr_0 = 1.5419e-04
Loss = 7.6255e-02, PNorm = 84.7176, GNorm = 0.7200, lr_0 = 1.5409e-04
Loss = 7.5638e-02, PNorm = 84.7195, GNorm = 0.5284, lr_0 = 1.5398e-04
Loss = 8.7966e-02, PNorm = 84.7218, GNorm = 0.6342, lr_0 = 1.5388e-04
Loss = 8.5298e-02, PNorm = 84.7250, GNorm = 0.5584, lr_0 = 1.5377e-04
Loss = 9.0075e-02, PNorm = 84.7302, GNorm = 0.7577, lr_0 = 1.5367e-04
Loss = 8.1671e-02, PNorm = 84.7327, GNorm = 0.6975, lr_0 = 1.5356e-04
Loss = 9.3048e-02, PNorm = 84.7336, GNorm = 0.5424, lr_0 = 1.5346e-04
Loss = 8.1047e-02, PNorm = 84.7367, GNorm = 0.4796, lr_0 = 1.5335e-04
Loss = 9.1276e-02, PNorm = 84.7383, GNorm = 0.6382, lr_0 = 1.5325e-04
Loss = 7.6476e-02, PNorm = 84.7415, GNorm = 0.6498, lr_0 = 1.5314e-04
Loss = 9.0699e-02, PNorm = 84.7456, GNorm = 0.7202, lr_0 = 1.5304e-04
Loss = 7.0783e-02, PNorm = 84.7465, GNorm = 0.4015, lr_0 = 1.5293e-04
Loss = 8.5323e-02, PNorm = 84.7473, GNorm = 0.6123, lr_0 = 1.5283e-04
Loss = 8.9493e-02, PNorm = 84.7483, GNorm = 0.5613, lr_0 = 1.5272e-04
Loss = 8.1149e-02, PNorm = 84.7496, GNorm = 0.7381, lr_0 = 1.5262e-04
Loss = 9.0801e-02, PNorm = 84.7505, GNorm = 0.7399, lr_0 = 1.5251e-04
Loss = 8.1641e-02, PNorm = 84.7518, GNorm = 0.7213, lr_0 = 1.5241e-04
Loss = 8.1862e-02, PNorm = 84.7563, GNorm = 0.7146, lr_0 = 1.5230e-04
Loss = 7.2038e-02, PNorm = 84.7601, GNorm = 0.8223, lr_0 = 1.5220e-04
Loss = 8.2569e-02, PNorm = 84.7615, GNorm = 0.8432, lr_0 = 1.5209e-04
Loss = 8.8123e-02, PNorm = 84.7614, GNorm = 0.5264, lr_0 = 1.5199e-04
Loss = 7.9791e-02, PNorm = 84.7655, GNorm = 0.4999, lr_0 = 1.5189e-04
Loss = 7.7504e-02, PNorm = 84.7680, GNorm = 0.4932, lr_0 = 1.5178e-04
Loss = 8.3602e-02, PNorm = 84.7685, GNorm = 0.6025, lr_0 = 1.5168e-04
Loss = 8.1972e-02, PNorm = 84.7708, GNorm = 0.5571, lr_0 = 1.5157e-04
Loss = 8.0614e-02, PNorm = 84.7715, GNorm = 0.5915, lr_0 = 1.5147e-04
Loss = 8.2420e-02, PNorm = 84.7722, GNorm = 0.5162, lr_0 = 1.5137e-04
Loss = 8.3822e-02, PNorm = 84.7737, GNorm = 0.7080, lr_0 = 1.5126e-04
Loss = 9.4789e-02, PNorm = 84.7754, GNorm = 0.7100, lr_0 = 1.5116e-04
Loss = 9.3583e-02, PNorm = 84.7778, GNorm = 0.7444, lr_0 = 1.5106e-04
Loss = 9.1401e-02, PNorm = 84.7807, GNorm = 0.6577, lr_0 = 1.5095e-04
Loss = 8.0993e-02, PNorm = 84.7836, GNorm = 0.8044, lr_0 = 1.5085e-04
Validation mae = 0.226788
Epoch 25
Loss = 9.0637e-02, PNorm = 84.7871, GNorm = 0.5883, lr_0 = 1.5075e-04
Loss = 7.9730e-02, PNorm = 84.7896, GNorm = 0.5286, lr_0 = 1.5064e-04
Loss = 7.3384e-02, PNorm = 84.7907, GNorm = 0.5844, lr_0 = 1.5054e-04
Loss = 6.2989e-02, PNorm = 84.7933, GNorm = 0.5667, lr_0 = 1.5044e-04
Loss = 8.1285e-02, PNorm = 84.7965, GNorm = 0.8602, lr_0 = 1.5033e-04
Loss = 6.9562e-02, PNorm = 84.7989, GNorm = 0.5153, lr_0 = 1.5023e-04
Loss = 6.6239e-02, PNorm = 84.8012, GNorm = 0.5994, lr_0 = 1.5013e-04
Loss = 8.1320e-02, PNorm = 84.8047, GNorm = 0.9099, lr_0 = 1.5002e-04
Loss = 8.0171e-02, PNorm = 84.8053, GNorm = 0.6413, lr_0 = 1.4992e-04
Loss = 8.3298e-02, PNorm = 84.8075, GNorm = 0.4989, lr_0 = 1.4982e-04
Loss = 6.7698e-02, PNorm = 84.8103, GNorm = 0.5038, lr_0 = 1.4972e-04
Loss = 8.8820e-02, PNorm = 84.8112, GNorm = 0.6900, lr_0 = 1.4961e-04
Loss = 7.4521e-02, PNorm = 84.8124, GNorm = 0.5007, lr_0 = 1.4951e-04
Loss = 8.9758e-02, PNorm = 84.8155, GNorm = 0.6285, lr_0 = 1.4941e-04
Loss = 7.9384e-02, PNorm = 84.8188, GNorm = 0.6265, lr_0 = 1.4931e-04
Loss = 7.9417e-02, PNorm = 84.8220, GNorm = 0.4446, lr_0 = 1.4920e-04
Loss = 6.9003e-02, PNorm = 84.8240, GNorm = 0.5138, lr_0 = 1.4910e-04
Loss = 8.4153e-02, PNorm = 84.8263, GNorm = 0.5514, lr_0 = 1.4900e-04
Loss = 8.0409e-02, PNorm = 84.8282, GNorm = 0.5631, lr_0 = 1.4890e-04
Loss = 8.0914e-02, PNorm = 84.8295, GNorm = 0.7386, lr_0 = 1.4880e-04
Loss = 8.2639e-02, PNorm = 84.8317, GNorm = 0.6626, lr_0 = 1.4869e-04
Loss = 7.5451e-02, PNorm = 84.8341, GNorm = 0.8269, lr_0 = 1.4859e-04
Loss = 8.9652e-02, PNorm = 84.8356, GNorm = 0.6178, lr_0 = 1.4849e-04
Loss = 7.4806e-02, PNorm = 84.8380, GNorm = 0.6812, lr_0 = 1.4839e-04
Loss = 8.0220e-02, PNorm = 84.8405, GNorm = 0.6241, lr_0 = 1.4829e-04
Loss = 7.3785e-02, PNorm = 84.8431, GNorm = 0.4492, lr_0 = 1.4818e-04
Loss = 8.1361e-02, PNorm = 84.8464, GNorm = 0.7108, lr_0 = 1.4808e-04
Loss = 7.4677e-02, PNorm = 84.8482, GNorm = 0.5690, lr_0 = 1.4798e-04
Loss = 1.0033e-01, PNorm = 84.8482, GNorm = 0.7372, lr_0 = 1.4788e-04
Loss = 7.2258e-02, PNorm = 84.8491, GNorm = 0.6790, lr_0 = 1.4778e-04
Loss = 8.7268e-02, PNorm = 84.8525, GNorm = 0.5585, lr_0 = 1.4768e-04
Loss = 7.9506e-02, PNorm = 84.8573, GNorm = 0.7227, lr_0 = 1.4758e-04
Loss = 7.8439e-02, PNorm = 84.8595, GNorm = 0.5166, lr_0 = 1.4748e-04
Loss = 7.5208e-02, PNorm = 84.8594, GNorm = 0.8098, lr_0 = 1.4737e-04
Loss = 6.9321e-02, PNorm = 84.8612, GNorm = 0.6889, lr_0 = 1.4727e-04
Loss = 7.6598e-02, PNorm = 84.8641, GNorm = 0.7195, lr_0 = 1.4717e-04
Loss = 7.7449e-02, PNorm = 84.8665, GNorm = 0.6795, lr_0 = 1.4707e-04
Loss = 8.3180e-02, PNorm = 84.8684, GNorm = 0.5191, lr_0 = 1.4697e-04
Loss = 7.3742e-02, PNorm = 84.8706, GNorm = 0.6143, lr_0 = 1.4687e-04
Loss = 7.4722e-02, PNorm = 84.8736, GNorm = 0.8320, lr_0 = 1.4677e-04
Loss = 8.3716e-02, PNorm = 84.8784, GNorm = 0.6582, lr_0 = 1.4667e-04
Loss = 7.7931e-02, PNorm = 84.8796, GNorm = 0.5717, lr_0 = 1.4657e-04
Loss = 8.6056e-02, PNorm = 84.8813, GNorm = 0.5628, lr_0 = 1.4647e-04
Loss = 8.5318e-02, PNorm = 84.8851, GNorm = 0.6354, lr_0 = 1.4637e-04
Loss = 8.4661e-02, PNorm = 84.8873, GNorm = 0.9952, lr_0 = 1.4627e-04
Loss = 7.7739e-02, PNorm = 84.8903, GNorm = 0.5659, lr_0 = 1.4617e-04
Loss = 7.4991e-02, PNorm = 84.8939, GNorm = 0.4733, lr_0 = 1.4607e-04
Loss = 8.3850e-02, PNorm = 84.8954, GNorm = 0.7591, lr_0 = 1.4597e-04
Loss = 7.1820e-02, PNorm = 84.8988, GNorm = 0.6944, lr_0 = 1.4587e-04
Loss = 8.7103e-02, PNorm = 84.9013, GNorm = 0.6618, lr_0 = 1.4577e-04
Loss = 7.1762e-02, PNorm = 84.9054, GNorm = 0.6287, lr_0 = 1.4567e-04
Loss = 7.7810e-02, PNorm = 84.9076, GNorm = 0.6092, lr_0 = 1.4557e-04
Loss = 7.8370e-02, PNorm = 84.9097, GNorm = 0.5554, lr_0 = 1.4547e-04
Loss = 7.0209e-02, PNorm = 84.9123, GNorm = 0.6565, lr_0 = 1.4537e-04
Loss = 7.4161e-02, PNorm = 84.9129, GNorm = 0.5971, lr_0 = 1.4527e-04
Loss = 8.2621e-02, PNorm = 84.9128, GNorm = 0.7022, lr_0 = 1.4517e-04
Loss = 8.9001e-02, PNorm = 84.9149, GNorm = 0.6456, lr_0 = 1.4507e-04
Loss = 8.1354e-02, PNorm = 84.9174, GNorm = 0.6544, lr_0 = 1.4497e-04
Loss = 7.8291e-02, PNorm = 84.9157, GNorm = 0.4803, lr_0 = 1.4487e-04
Loss = 7.9497e-02, PNorm = 84.9151, GNorm = 0.5739, lr_0 = 1.4477e-04
Loss = 7.6757e-02, PNorm = 84.9170, GNorm = 0.6937, lr_0 = 1.4467e-04
Loss = 8.5906e-02, PNorm = 84.9212, GNorm = 0.7778, lr_0 = 1.4457e-04
Loss = 7.4251e-02, PNorm = 84.9239, GNorm = 0.5861, lr_0 = 1.4447e-04
Loss = 8.3285e-02, PNorm = 84.9273, GNorm = 0.5747, lr_0 = 1.4438e-04
Loss = 7.5437e-02, PNorm = 84.9295, GNorm = 0.6005, lr_0 = 1.4428e-04
Loss = 7.7925e-02, PNorm = 84.9329, GNorm = 0.6394, lr_0 = 1.4418e-04
Loss = 8.3780e-02, PNorm = 84.9366, GNorm = 0.5799, lr_0 = 1.4408e-04
Loss = 8.9457e-02, PNorm = 84.9376, GNorm = 0.7453, lr_0 = 1.4398e-04
Loss = 8.7174e-02, PNorm = 84.9404, GNorm = 0.6598, lr_0 = 1.4388e-04
Loss = 7.9413e-02, PNorm = 84.9425, GNorm = 0.7384, lr_0 = 1.4378e-04
Loss = 8.3682e-02, PNorm = 84.9437, GNorm = 0.7103, lr_0 = 1.4368e-04
Loss = 7.5675e-02, PNorm = 84.9461, GNorm = 0.6587, lr_0 = 1.4359e-04
Loss = 9.0216e-02, PNorm = 84.9474, GNorm = 0.6289, lr_0 = 1.4349e-04
Loss = 8.5370e-02, PNorm = 84.9496, GNorm = 0.7783, lr_0 = 1.4339e-04
Loss = 8.5571e-02, PNorm = 84.9539, GNorm = 0.6930, lr_0 = 1.4329e-04
Loss = 1.0074e-01, PNorm = 84.9578, GNorm = 0.6734, lr_0 = 1.4319e-04
Loss = 7.3891e-02, PNorm = 84.9591, GNorm = 0.4783, lr_0 = 1.4310e-04
Loss = 8.4494e-02, PNorm = 84.9611, GNorm = 0.6853, lr_0 = 1.4300e-04
Loss = 7.5599e-02, PNorm = 84.9608, GNorm = 0.5241, lr_0 = 1.4290e-04
Loss = 9.7598e-02, PNorm = 84.9639, GNorm = 0.5475, lr_0 = 1.4280e-04
Loss = 7.6382e-02, PNorm = 84.9672, GNorm = 0.5804, lr_0 = 1.4270e-04
Loss = 7.6020e-02, PNorm = 84.9702, GNorm = 0.6597, lr_0 = 1.4261e-04
Loss = 7.8305e-02, PNorm = 84.9737, GNorm = 0.5543, lr_0 = 1.4251e-04
Loss = 8.0760e-02, PNorm = 84.9778, GNorm = 0.5345, lr_0 = 1.4241e-04
Loss = 7.9776e-02, PNorm = 84.9801, GNorm = 0.5943, lr_0 = 1.4231e-04
Loss = 8.1175e-02, PNorm = 84.9808, GNorm = 0.8036, lr_0 = 1.4222e-04
Loss = 8.4540e-02, PNorm = 84.9823, GNorm = 0.6199, lr_0 = 1.4212e-04
Loss = 8.7083e-02, PNorm = 84.9850, GNorm = 0.6901, lr_0 = 1.4202e-04
Loss = 8.9327e-02, PNorm = 84.9871, GNorm = 0.5935, lr_0 = 1.4192e-04
Loss = 7.6206e-02, PNorm = 84.9892, GNorm = 0.6498, lr_0 = 1.4183e-04
Loss = 7.9287e-02, PNorm = 84.9942, GNorm = 0.5869, lr_0 = 1.4173e-04
Loss = 8.5495e-02, PNorm = 84.9976, GNorm = 0.5739, lr_0 = 1.4163e-04
Loss = 7.9329e-02, PNorm = 85.0011, GNorm = 0.8121, lr_0 = 1.4153e-04
Loss = 8.4376e-02, PNorm = 85.0049, GNorm = 0.7002, lr_0 = 1.4144e-04
Loss = 7.7346e-02, PNorm = 85.0064, GNorm = 0.8500, lr_0 = 1.4134e-04
Loss = 7.1036e-02, PNorm = 85.0072, GNorm = 0.4834, lr_0 = 1.4124e-04
Loss = 8.2358e-02, PNorm = 85.0075, GNorm = 0.5954, lr_0 = 1.4115e-04
Loss = 7.8858e-02, PNorm = 85.0098, GNorm = 0.5881, lr_0 = 1.4105e-04
Loss = 7.2683e-02, PNorm = 85.0129, GNorm = 0.6274, lr_0 = 1.4095e-04
Loss = 8.0025e-02, PNorm = 85.0139, GNorm = 0.5384, lr_0 = 1.4086e-04
Loss = 8.4131e-02, PNorm = 85.0158, GNorm = 0.6503, lr_0 = 1.4076e-04
Loss = 7.8990e-02, PNorm = 85.0191, GNorm = 0.5986, lr_0 = 1.4066e-04
Loss = 7.9851e-02, PNorm = 85.0204, GNorm = 0.7837, lr_0 = 1.4057e-04
Loss = 7.4832e-02, PNorm = 85.0213, GNorm = 0.8077, lr_0 = 1.4047e-04
Loss = 8.6062e-02, PNorm = 85.0242, GNorm = 0.8007, lr_0 = 1.4038e-04
Loss = 8.4385e-02, PNorm = 85.0261, GNorm = 0.5380, lr_0 = 1.4028e-04
Loss = 8.5883e-02, PNorm = 85.0281, GNorm = 0.6528, lr_0 = 1.4018e-04
Loss = 8.2139e-02, PNorm = 85.0287, GNorm = 0.6215, lr_0 = 1.4009e-04
Loss = 7.9788e-02, PNorm = 85.0303, GNorm = 0.7323, lr_0 = 1.3999e-04
Loss = 8.7208e-02, PNorm = 85.0333, GNorm = 0.6302, lr_0 = 1.3990e-04
Loss = 7.7362e-02, PNorm = 85.0361, GNorm = 0.5627, lr_0 = 1.3980e-04
Loss = 7.4137e-02, PNorm = 85.0398, GNorm = 0.6615, lr_0 = 1.3970e-04
Loss = 8.7858e-02, PNorm = 85.0421, GNorm = 0.5272, lr_0 = 1.3961e-04
Loss = 8.3531e-02, PNorm = 85.0458, GNorm = 0.7181, lr_0 = 1.3951e-04
Loss = 8.4056e-02, PNorm = 85.0486, GNorm = 0.7273, lr_0 = 1.3942e-04
Loss = 8.1235e-02, PNorm = 85.0507, GNorm = 0.8349, lr_0 = 1.3932e-04
Loss = 7.9283e-02, PNorm = 85.0534, GNorm = 0.5796, lr_0 = 1.3923e-04
Loss = 8.8613e-02, PNorm = 85.0551, GNorm = 0.7507, lr_0 = 1.3913e-04
Loss = 8.1903e-02, PNorm = 85.0575, GNorm = 0.5017, lr_0 = 1.3904e-04
Loss = 9.3936e-02, PNorm = 85.0594, GNorm = 0.7691, lr_0 = 1.3894e-04
Validation mae = 0.231430
Epoch 26
Loss = 8.0341e-02, PNorm = 85.0603, GNorm = 0.5807, lr_0 = 1.3884e-04
Loss = 7.8505e-02, PNorm = 85.0603, GNorm = 0.6307, lr_0 = 1.3875e-04
Loss = 7.8508e-02, PNorm = 85.0612, GNorm = 0.6957, lr_0 = 1.3865e-04
Loss = 7.9532e-02, PNorm = 85.0652, GNorm = 0.4800, lr_0 = 1.3856e-04
Loss = 8.9384e-02, PNorm = 85.0665, GNorm = 0.7429, lr_0 = 1.3846e-04
Loss = 7.9827e-02, PNorm = 85.0665, GNorm = 0.5016, lr_0 = 1.3837e-04
Loss = 6.7893e-02, PNorm = 85.0686, GNorm = 0.4716, lr_0 = 1.3828e-04
Loss = 8.5267e-02, PNorm = 85.0717, GNorm = 0.5725, lr_0 = 1.3818e-04
Loss = 7.4650e-02, PNorm = 85.0732, GNorm = 0.8035, lr_0 = 1.3809e-04
Loss = 8.0497e-02, PNorm = 85.0769, GNorm = 0.8294, lr_0 = 1.3799e-04
Loss = 7.0734e-02, PNorm = 85.0803, GNorm = 0.8404, lr_0 = 1.3790e-04
Loss = 7.7909e-02, PNorm = 85.0821, GNorm = 0.6817, lr_0 = 1.3780e-04
Loss = 8.0138e-02, PNorm = 85.0847, GNorm = 0.6923, lr_0 = 1.3771e-04
Loss = 7.7277e-02, PNorm = 85.0880, GNorm = 0.6378, lr_0 = 1.3761e-04
Loss = 8.2231e-02, PNorm = 85.0910, GNorm = 0.5245, lr_0 = 1.3752e-04
Loss = 8.6655e-02, PNorm = 85.0907, GNorm = 0.6597, lr_0 = 1.3742e-04
Loss = 8.2414e-02, PNorm = 85.0951, GNorm = 0.6155, lr_0 = 1.3733e-04
Loss = 7.8462e-02, PNorm = 85.0992, GNorm = 0.5444, lr_0 = 1.3724e-04
Loss = 7.5306e-02, PNorm = 85.1035, GNorm = 0.7018, lr_0 = 1.3714e-04
Loss = 7.9541e-02, PNorm = 85.1067, GNorm = 0.6273, lr_0 = 1.3705e-04
Loss = 8.0180e-02, PNorm = 85.1096, GNorm = 0.7626, lr_0 = 1.3695e-04
Loss = 7.4298e-02, PNorm = 85.1121, GNorm = 0.7945, lr_0 = 1.3686e-04
Loss = 7.6718e-02, PNorm = 85.1133, GNorm = 0.6608, lr_0 = 1.3677e-04
Loss = 7.4792e-02, PNorm = 85.1137, GNorm = 0.8324, lr_0 = 1.3667e-04
Loss = 7.2259e-02, PNorm = 85.1143, GNorm = 0.5110, lr_0 = 1.3658e-04
Loss = 8.2588e-02, PNorm = 85.1158, GNorm = 0.8441, lr_0 = 1.3649e-04
Loss = 6.8955e-02, PNorm = 85.1180, GNorm = 0.5733, lr_0 = 1.3639e-04
Loss = 6.6932e-02, PNorm = 85.1192, GNorm = 0.7563, lr_0 = 1.3630e-04
Loss = 6.5991e-02, PNorm = 85.1209, GNorm = 0.5376, lr_0 = 1.3621e-04
Loss = 7.6896e-02, PNorm = 85.1243, GNorm = 0.5931, lr_0 = 1.3611e-04
Loss = 7.9689e-02, PNorm = 85.1253, GNorm = 0.7149, lr_0 = 1.3602e-04
Loss = 7.1308e-02, PNorm = 85.1262, GNorm = 0.6427, lr_0 = 1.3593e-04
Loss = 8.1354e-02, PNorm = 85.1294, GNorm = 0.9434, lr_0 = 1.3583e-04
Loss = 9.1634e-02, PNorm = 85.1306, GNorm = 0.7759, lr_0 = 1.3574e-04
Loss = 7.7890e-02, PNorm = 85.1324, GNorm = 0.6392, lr_0 = 1.3565e-04
Loss = 7.7509e-02, PNorm = 85.1357, GNorm = 0.8902, lr_0 = 1.3555e-04
Loss = 8.2961e-02, PNorm = 85.1384, GNorm = 0.6796, lr_0 = 1.3546e-04
Loss = 8.9970e-02, PNorm = 85.1403, GNorm = 0.5956, lr_0 = 1.3537e-04
Loss = 8.3002e-02, PNorm = 85.1428, GNorm = 0.6622, lr_0 = 1.3528e-04
Loss = 8.0434e-02, PNorm = 85.1454, GNorm = 0.4669, lr_0 = 1.3518e-04
Loss = 7.2504e-02, PNorm = 85.1473, GNorm = 0.5878, lr_0 = 1.3509e-04
Loss = 7.2347e-02, PNorm = 85.1480, GNorm = 0.4851, lr_0 = 1.3500e-04
Loss = 7.2239e-02, PNorm = 85.1476, GNorm = 0.7501, lr_0 = 1.3491e-04
Loss = 8.9533e-02, PNorm = 85.1508, GNorm = 0.6521, lr_0 = 1.3481e-04
Loss = 9.5723e-02, PNorm = 85.1506, GNorm = 0.5434, lr_0 = 1.3472e-04
Loss = 7.6391e-02, PNorm = 85.1511, GNorm = 0.5524, lr_0 = 1.3463e-04
Loss = 8.8526e-02, PNorm = 85.1526, GNorm = 0.5547, lr_0 = 1.3454e-04
Loss = 8.0342e-02, PNorm = 85.1532, GNorm = 0.7402, lr_0 = 1.3444e-04
Loss = 9.1579e-02, PNorm = 85.1553, GNorm = 0.6206, lr_0 = 1.3435e-04
Loss = 8.8829e-02, PNorm = 85.1574, GNorm = 0.7288, lr_0 = 1.3426e-04
Loss = 7.6162e-02, PNorm = 85.1607, GNorm = 0.6548, lr_0 = 1.3417e-04
Loss = 7.8754e-02, PNorm = 85.1606, GNorm = 0.5316, lr_0 = 1.3408e-04
Loss = 8.2543e-02, PNorm = 85.1617, GNorm = 0.6626, lr_0 = 1.3398e-04
Loss = 8.0966e-02, PNorm = 85.1639, GNorm = 0.5750, lr_0 = 1.3389e-04
Loss = 7.9080e-02, PNorm = 85.1675, GNorm = 0.5522, lr_0 = 1.3380e-04
Loss = 7.3669e-02, PNorm = 85.1708, GNorm = 0.6553, lr_0 = 1.3371e-04
Loss = 7.7816e-02, PNorm = 85.1707, GNorm = 0.5004, lr_0 = 1.3362e-04
Loss = 7.3637e-02, PNorm = 85.1699, GNorm = 0.6435, lr_0 = 1.3353e-04
Loss = 9.4209e-02, PNorm = 85.1711, GNorm = 0.9865, lr_0 = 1.3343e-04
Loss = 7.9605e-02, PNorm = 85.1724, GNorm = 0.6350, lr_0 = 1.3334e-04
Loss = 8.1524e-02, PNorm = 85.1737, GNorm = 0.7171, lr_0 = 1.3325e-04
Loss = 8.7286e-02, PNorm = 85.1758, GNorm = 0.8408, lr_0 = 1.3316e-04
Loss = 7.5906e-02, PNorm = 85.1780, GNorm = 0.5134, lr_0 = 1.3307e-04
Loss = 7.8863e-02, PNorm = 85.1816, GNorm = 0.7427, lr_0 = 1.3298e-04
Loss = 7.3432e-02, PNorm = 85.1858, GNorm = 0.5620, lr_0 = 1.3289e-04
Loss = 7.7050e-02, PNorm = 85.1901, GNorm = 0.5559, lr_0 = 1.3280e-04
Loss = 8.5432e-02, PNorm = 85.1931, GNorm = 0.6124, lr_0 = 1.3270e-04
Loss = 7.8137e-02, PNorm = 85.1951, GNorm = 0.7342, lr_0 = 1.3261e-04
Loss = 7.3338e-02, PNorm = 85.1966, GNorm = 0.5650, lr_0 = 1.3252e-04
Loss = 7.5031e-02, PNorm = 85.1973, GNorm = 0.7803, lr_0 = 1.3243e-04
Loss = 8.4990e-02, PNorm = 85.1994, GNorm = 0.6858, lr_0 = 1.3234e-04
Loss = 7.1578e-02, PNorm = 85.2001, GNorm = 0.7528, lr_0 = 1.3225e-04
Loss = 7.9289e-02, PNorm = 85.2017, GNorm = 0.5102, lr_0 = 1.3216e-04
Loss = 7.5824e-02, PNorm = 85.2032, GNorm = 0.6154, lr_0 = 1.3207e-04
Loss = 8.2760e-02, PNorm = 85.2040, GNorm = 0.7096, lr_0 = 1.3198e-04
Loss = 7.6128e-02, PNorm = 85.2054, GNorm = 0.4838, lr_0 = 1.3189e-04
Loss = 8.3900e-02, PNorm = 85.2069, GNorm = 0.7798, lr_0 = 1.3180e-04
Loss = 8.6548e-02, PNorm = 85.2087, GNorm = 0.5519, lr_0 = 1.3171e-04
Loss = 7.6685e-02, PNorm = 85.2112, GNorm = 0.5618, lr_0 = 1.3162e-04
Loss = 7.8954e-02, PNorm = 85.2126, GNorm = 0.7561, lr_0 = 1.3153e-04
Loss = 8.3219e-02, PNorm = 85.2122, GNorm = 0.5758, lr_0 = 1.3144e-04
Loss = 8.0272e-02, PNorm = 85.2116, GNorm = 0.5677, lr_0 = 1.3135e-04
Loss = 7.5509e-02, PNorm = 85.2135, GNorm = 0.7268, lr_0 = 1.3126e-04
Loss = 7.0513e-02, PNorm = 85.2146, GNorm = 0.4331, lr_0 = 1.3117e-04
Loss = 8.4504e-02, PNorm = 85.2168, GNorm = 0.7940, lr_0 = 1.3108e-04
Loss = 8.0766e-02, PNorm = 85.2203, GNorm = 0.7366, lr_0 = 1.3099e-04
Loss = 8.4487e-02, PNorm = 85.2233, GNorm = 0.6595, lr_0 = 1.3090e-04
Loss = 8.4642e-02, PNorm = 85.2243, GNorm = 0.6151, lr_0 = 1.3081e-04
Loss = 7.7047e-02, PNorm = 85.2270, GNorm = 0.5860, lr_0 = 1.3072e-04
Loss = 8.5881e-02, PNorm = 85.2303, GNorm = 0.4735, lr_0 = 1.3063e-04
Loss = 8.5955e-02, PNorm = 85.2329, GNorm = 0.9525, lr_0 = 1.3054e-04
Loss = 8.5323e-02, PNorm = 85.2347, GNorm = 0.5280, lr_0 = 1.3045e-04
Loss = 8.3033e-02, PNorm = 85.2386, GNorm = 0.5531, lr_0 = 1.3036e-04
Loss = 6.6789e-02, PNorm = 85.2407, GNorm = 0.7522, lr_0 = 1.3027e-04
Loss = 8.3036e-02, PNorm = 85.2423, GNorm = 0.8725, lr_0 = 1.3018e-04
Loss = 8.0669e-02, PNorm = 85.2432, GNorm = 0.6778, lr_0 = 1.3009e-04
Loss = 7.6944e-02, PNorm = 85.2442, GNorm = 0.6346, lr_0 = 1.3000e-04
Loss = 7.4150e-02, PNorm = 85.2467, GNorm = 0.4746, lr_0 = 1.2992e-04
Loss = 7.2874e-02, PNorm = 85.2490, GNorm = 0.8282, lr_0 = 1.2983e-04
Loss = 8.3060e-02, PNorm = 85.2517, GNorm = 0.6768, lr_0 = 1.2974e-04
Loss = 8.4398e-02, PNorm = 85.2535, GNorm = 0.8096, lr_0 = 1.2965e-04
Loss = 7.3498e-02, PNorm = 85.2557, GNorm = 0.7208, lr_0 = 1.2956e-04
Loss = 8.5416e-02, PNorm = 85.2582, GNorm = 0.8990, lr_0 = 1.2947e-04
Loss = 8.0709e-02, PNorm = 85.2600, GNorm = 0.6116, lr_0 = 1.2938e-04
Loss = 7.4336e-02, PNorm = 85.2607, GNorm = 0.8169, lr_0 = 1.2929e-04
Loss = 8.4692e-02, PNorm = 85.2618, GNorm = 0.8149, lr_0 = 1.2921e-04
Loss = 8.4698e-02, PNorm = 85.2640, GNorm = 0.8820, lr_0 = 1.2912e-04
Loss = 7.5514e-02, PNorm = 85.2660, GNorm = 0.6520, lr_0 = 1.2903e-04
Loss = 8.7337e-02, PNorm = 85.2692, GNorm = 0.8373, lr_0 = 1.2894e-04
Loss = 8.4756e-02, PNorm = 85.2727, GNorm = 0.6726, lr_0 = 1.2885e-04
Loss = 8.2154e-02, PNorm = 85.2742, GNorm = 0.5428, lr_0 = 1.2876e-04
Loss = 8.6965e-02, PNorm = 85.2750, GNorm = 0.6635, lr_0 = 1.2867e-04
Loss = 7.5440e-02, PNorm = 85.2764, GNorm = 0.6135, lr_0 = 1.2859e-04
Loss = 7.7414e-02, PNorm = 85.2788, GNorm = 0.6079, lr_0 = 1.2850e-04
Loss = 7.7425e-02, PNorm = 85.2818, GNorm = 0.5138, lr_0 = 1.2841e-04
Loss = 8.8050e-02, PNorm = 85.2852, GNorm = 0.5856, lr_0 = 1.2832e-04
Loss = 7.9104e-02, PNorm = 85.2875, GNorm = 0.7368, lr_0 = 1.2823e-04
Loss = 7.2592e-02, PNorm = 85.2905, GNorm = 0.5762, lr_0 = 1.2815e-04
Loss = 7.9845e-02, PNorm = 85.2911, GNorm = 0.5810, lr_0 = 1.2806e-04
Loss = 7.1460e-02, PNorm = 85.2923, GNorm = 0.6864, lr_0 = 1.2797e-04
Validation mae = 0.229696
Epoch 27
Loss = 7.1711e-02, PNorm = 85.2931, GNorm = 0.6259, lr_0 = 1.2788e-04
Loss = 7.2424e-02, PNorm = 85.2933, GNorm = 0.7167, lr_0 = 1.2780e-04
Loss = 6.5847e-02, PNorm = 85.2948, GNorm = 0.4828, lr_0 = 1.2771e-04
Loss = 7.8851e-02, PNorm = 85.2963, GNorm = 0.6274, lr_0 = 1.2762e-04
Loss = 8.7603e-02, PNorm = 85.2979, GNorm = 0.5874, lr_0 = 1.2753e-04
Loss = 8.7509e-02, PNorm = 85.3000, GNorm = 1.0662, lr_0 = 1.2745e-04
Loss = 7.6904e-02, PNorm = 85.3021, GNorm = 0.7371, lr_0 = 1.2736e-04
Loss = 7.9400e-02, PNorm = 85.3041, GNorm = 0.5431, lr_0 = 1.2727e-04
Loss = 8.4014e-02, PNorm = 85.3061, GNorm = 0.6922, lr_0 = 1.2718e-04
Loss = 7.8745e-02, PNorm = 85.3091, GNorm = 0.6443, lr_0 = 1.2710e-04
Loss = 7.3713e-02, PNorm = 85.3138, GNorm = 0.5133, lr_0 = 1.2701e-04
Loss = 7.7637e-02, PNorm = 85.3151, GNorm = 0.5763, lr_0 = 1.2692e-04
Loss = 7.7597e-02, PNorm = 85.3158, GNorm = 0.6416, lr_0 = 1.2684e-04
Loss = 8.4537e-02, PNorm = 85.3163, GNorm = 0.5070, lr_0 = 1.2675e-04
Loss = 7.9977e-02, PNorm = 85.3182, GNorm = 0.6561, lr_0 = 1.2666e-04
Loss = 7.3515e-02, PNorm = 85.3217, GNorm = 0.5585, lr_0 = 1.2658e-04
Loss = 6.6613e-02, PNorm = 85.3249, GNorm = 0.4816, lr_0 = 1.2649e-04
Loss = 7.8450e-02, PNorm = 85.3264, GNorm = 0.8947, lr_0 = 1.2640e-04
Loss = 7.3839e-02, PNorm = 85.3281, GNorm = 0.6624, lr_0 = 1.2632e-04
Loss = 7.0020e-02, PNorm = 85.3287, GNorm = 0.7001, lr_0 = 1.2623e-04
Loss = 8.1407e-02, PNorm = 85.3298, GNorm = 0.6777, lr_0 = 1.2614e-04
Loss = 7.5325e-02, PNorm = 85.3310, GNorm = 0.5020, lr_0 = 1.2606e-04
Loss = 8.9760e-02, PNorm = 85.3340, GNorm = 0.7255, lr_0 = 1.2597e-04
Loss = 8.1339e-02, PNorm = 85.3364, GNorm = 0.6342, lr_0 = 1.2588e-04
Loss = 6.5152e-02, PNorm = 85.3368, GNorm = 0.6375, lr_0 = 1.2580e-04
Loss = 7.9386e-02, PNorm = 85.3375, GNorm = 0.4612, lr_0 = 1.2571e-04
Loss = 8.3483e-02, PNorm = 85.3406, GNorm = 0.7065, lr_0 = 1.2563e-04
Loss = 7.6052e-02, PNorm = 85.3436, GNorm = 0.7421, lr_0 = 1.2554e-04
Loss = 8.3143e-02, PNorm = 85.3447, GNorm = 0.6698, lr_0 = 1.2545e-04
Loss = 7.2345e-02, PNorm = 85.3468, GNorm = 0.6995, lr_0 = 1.2537e-04
Loss = 7.5467e-02, PNorm = 85.3489, GNorm = 0.6783, lr_0 = 1.2528e-04
Loss = 6.8318e-02, PNorm = 85.3513, GNorm = 0.4871, lr_0 = 1.2520e-04
Loss = 8.3541e-02, PNorm = 85.3536, GNorm = 0.7658, lr_0 = 1.2511e-04
Loss = 8.5248e-02, PNorm = 85.3555, GNorm = 0.6685, lr_0 = 1.2502e-04
Loss = 8.3312e-02, PNorm = 85.3565, GNorm = 0.7103, lr_0 = 1.2494e-04
Loss = 8.3291e-02, PNorm = 85.3577, GNorm = 0.5645, lr_0 = 1.2485e-04
Loss = 8.0408e-02, PNorm = 85.3575, GNorm = 0.5608, lr_0 = 1.2477e-04
Loss = 8.1131e-02, PNorm = 85.3578, GNorm = 0.6896, lr_0 = 1.2468e-04
Loss = 7.6061e-02, PNorm = 85.3599, GNorm = 0.6398, lr_0 = 1.2460e-04
Loss = 7.9067e-02, PNorm = 85.3629, GNorm = 0.5605, lr_0 = 1.2451e-04
Loss = 7.6526e-02, PNorm = 85.3659, GNorm = 0.6999, lr_0 = 1.2443e-04
Loss = 7.6368e-02, PNorm = 85.3683, GNorm = 0.5665, lr_0 = 1.2434e-04
Loss = 7.8712e-02, PNorm = 85.3693, GNorm = 0.5945, lr_0 = 1.2426e-04
Loss = 8.0984e-02, PNorm = 85.3696, GNorm = 0.6581, lr_0 = 1.2417e-04
Loss = 6.5885e-02, PNorm = 85.3717, GNorm = 0.8151, lr_0 = 1.2409e-04
Loss = 7.8542e-02, PNorm = 85.3732, GNorm = 0.6747, lr_0 = 1.2400e-04
Loss = 7.6677e-02, PNorm = 85.3745, GNorm = 0.6804, lr_0 = 1.2392e-04
Loss = 7.0569e-02, PNorm = 85.3764, GNorm = 0.7127, lr_0 = 1.2383e-04
Loss = 7.9626e-02, PNorm = 85.3782, GNorm = 0.5975, lr_0 = 1.2375e-04
Loss = 7.8354e-02, PNorm = 85.3804, GNorm = 0.7211, lr_0 = 1.2366e-04
Loss = 7.9183e-02, PNorm = 85.3834, GNorm = 0.6710, lr_0 = 1.2358e-04
Loss = 8.9303e-02, PNorm = 85.3859, GNorm = 0.7589, lr_0 = 1.2349e-04
Loss = 7.4253e-02, PNorm = 85.3887, GNorm = 0.4792, lr_0 = 1.2341e-04
Loss = 8.7325e-02, PNorm = 85.3910, GNorm = 0.5176, lr_0 = 1.2332e-04
Loss = 7.1830e-02, PNorm = 85.3924, GNorm = 0.7773, lr_0 = 1.2324e-04
Loss = 8.3187e-02, PNorm = 85.3965, GNorm = 0.7840, lr_0 = 1.2315e-04
Loss = 8.1931e-02, PNorm = 85.3998, GNorm = 0.5580, lr_0 = 1.2307e-04
Loss = 6.9577e-02, PNorm = 85.4023, GNorm = 0.5072, lr_0 = 1.2298e-04
Loss = 8.1682e-02, PNorm = 85.4041, GNorm = 0.5743, lr_0 = 1.2290e-04
Loss = 7.7542e-02, PNorm = 85.4066, GNorm = 0.5859, lr_0 = 1.2282e-04
Loss = 7.8117e-02, PNorm = 85.4089, GNorm = 0.6462, lr_0 = 1.2273e-04
Loss = 7.1452e-02, PNorm = 85.4106, GNorm = 0.8318, lr_0 = 1.2265e-04
Loss = 7.2380e-02, PNorm = 85.4123, GNorm = 0.5347, lr_0 = 1.2256e-04
Loss = 7.9755e-02, PNorm = 85.4135, GNorm = 0.6503, lr_0 = 1.2248e-04
Loss = 6.6704e-02, PNorm = 85.4154, GNorm = 0.6293, lr_0 = 1.2240e-04
Loss = 7.5947e-02, PNorm = 85.4176, GNorm = 0.5154, lr_0 = 1.2231e-04
Loss = 8.4012e-02, PNorm = 85.4202, GNorm = 0.8578, lr_0 = 1.2223e-04
Loss = 7.3316e-02, PNorm = 85.4205, GNorm = 0.4688, lr_0 = 1.2214e-04
Loss = 8.0097e-02, PNorm = 85.4227, GNorm = 0.6254, lr_0 = 1.2206e-04
Loss = 8.0436e-02, PNorm = 85.4253, GNorm = 0.6564, lr_0 = 1.2198e-04
Loss = 9.8207e-02, PNorm = 85.4273, GNorm = 0.8410, lr_0 = 1.2189e-04
Loss = 7.3369e-02, PNorm = 85.4270, GNorm = 0.7184, lr_0 = 1.2181e-04
Loss = 8.0653e-02, PNorm = 85.4277, GNorm = 0.6284, lr_0 = 1.2173e-04
Loss = 7.9567e-02, PNorm = 85.4301, GNorm = 0.6181, lr_0 = 1.2164e-04
Loss = 7.9551e-02, PNorm = 85.4327, GNorm = 0.5389, lr_0 = 1.2156e-04
Loss = 7.1434e-02, PNorm = 85.4349, GNorm = 0.6099, lr_0 = 1.2148e-04
Loss = 8.6298e-02, PNorm = 85.4360, GNorm = 0.7158, lr_0 = 1.2139e-04
Loss = 7.7147e-02, PNorm = 85.4372, GNorm = 0.5054, lr_0 = 1.2131e-04
Loss = 7.4664e-02, PNorm = 85.4391, GNorm = 0.5481, lr_0 = 1.2123e-04
Loss = 6.6886e-02, PNorm = 85.4418, GNorm = 0.6244, lr_0 = 1.2114e-04
Loss = 6.7464e-02, PNorm = 85.4437, GNorm = 0.8154, lr_0 = 1.2106e-04
Loss = 7.6020e-02, PNorm = 85.4448, GNorm = 0.5493, lr_0 = 1.2098e-04
Loss = 7.8560e-02, PNorm = 85.4446, GNorm = 0.7205, lr_0 = 1.2090e-04
Loss = 7.8171e-02, PNorm = 85.4476, GNorm = 0.5705, lr_0 = 1.2081e-04
Loss = 8.1634e-02, PNorm = 85.4501, GNorm = 0.5208, lr_0 = 1.2073e-04
Loss = 7.7814e-02, PNorm = 85.4494, GNorm = 0.5959, lr_0 = 1.2065e-04
Loss = 8.2067e-02, PNorm = 85.4502, GNorm = 0.5870, lr_0 = 1.2056e-04
Loss = 7.5097e-02, PNorm = 85.4522, GNorm = 0.4840, lr_0 = 1.2048e-04
Loss = 8.7256e-02, PNorm = 85.4551, GNorm = 0.7903, lr_0 = 1.2040e-04
Loss = 8.9374e-02, PNorm = 85.4594, GNorm = 0.5272, lr_0 = 1.2032e-04
Loss = 8.8977e-02, PNorm = 85.4617, GNorm = 0.6580, lr_0 = 1.2023e-04
Loss = 6.6596e-02, PNorm = 85.4632, GNorm = 0.6733, lr_0 = 1.2015e-04
Loss = 7.6345e-02, PNorm = 85.4638, GNorm = 0.5962, lr_0 = 1.2007e-04
Loss = 7.1253e-02, PNorm = 85.4660, GNorm = 0.5921, lr_0 = 1.1999e-04
Loss = 7.2894e-02, PNorm = 85.4683, GNorm = 0.5239, lr_0 = 1.1991e-04
Loss = 7.7283e-02, PNorm = 85.4701, GNorm = 0.6080, lr_0 = 1.1982e-04
Loss = 8.0899e-02, PNorm = 85.4716, GNorm = 0.6779, lr_0 = 1.1974e-04
Loss = 8.0238e-02, PNorm = 85.4715, GNorm = 1.2105, lr_0 = 1.1966e-04
Loss = 7.3999e-02, PNorm = 85.4728, GNorm = 0.5824, lr_0 = 1.1958e-04
Loss = 7.9827e-02, PNorm = 85.4752, GNorm = 0.8967, lr_0 = 1.1950e-04
Loss = 7.7171e-02, PNorm = 85.4751, GNorm = 0.6392, lr_0 = 1.1941e-04
Loss = 8.8622e-02, PNorm = 85.4759, GNorm = 0.6797, lr_0 = 1.1933e-04
Loss = 7.7015e-02, PNorm = 85.4781, GNorm = 0.6561, lr_0 = 1.1925e-04
Loss = 7.9190e-02, PNorm = 85.4790, GNorm = 0.5947, lr_0 = 1.1917e-04
Loss = 7.8895e-02, PNorm = 85.4800, GNorm = 0.7182, lr_0 = 1.1909e-04
Loss = 8.8504e-02, PNorm = 85.4807, GNorm = 0.4843, lr_0 = 1.1901e-04
Loss = 7.7614e-02, PNorm = 85.4818, GNorm = 0.5948, lr_0 = 1.1892e-04
Loss = 9.3378e-02, PNorm = 85.4857, GNorm = 0.5795, lr_0 = 1.1884e-04
Loss = 6.8028e-02, PNorm = 85.4887, GNorm = 0.6031, lr_0 = 1.1876e-04
Loss = 8.2303e-02, PNorm = 85.4910, GNorm = 0.5730, lr_0 = 1.1868e-04
Loss = 8.2766e-02, PNorm = 85.4946, GNorm = 0.7296, lr_0 = 1.1860e-04
Loss = 7.0685e-02, PNorm = 85.4954, GNorm = 0.7228, lr_0 = 1.1852e-04
Loss = 8.8826e-02, PNorm = 85.4950, GNorm = 0.5097, lr_0 = 1.1844e-04
Loss = 8.3266e-02, PNorm = 85.4942, GNorm = 0.7549, lr_0 = 1.1835e-04
Loss = 8.5263e-02, PNorm = 85.4967, GNorm = 0.7631, lr_0 = 1.1827e-04
Loss = 6.5771e-02, PNorm = 85.4977, GNorm = 0.6405, lr_0 = 1.1819e-04
Loss = 8.2614e-02, PNorm = 85.4989, GNorm = 0.7052, lr_0 = 1.1811e-04
Loss = 6.9716e-02, PNorm = 85.5009, GNorm = 0.5036, lr_0 = 1.1803e-04
Loss = 7.2876e-02, PNorm = 85.5037, GNorm = 0.6436, lr_0 = 1.1795e-04
Loss = 7.3606e-02, PNorm = 85.5052, GNorm = 0.6462, lr_0 = 1.1787e-04
Validation mae = 0.225568
Epoch 28
Loss = 7.6859e-02, PNorm = 85.5067, GNorm = 0.7763, lr_0 = 1.1779e-04
Loss = 8.4485e-02, PNorm = 85.5106, GNorm = 0.6656, lr_0 = 1.1771e-04
Loss = 6.8183e-02, PNorm = 85.5133, GNorm = 0.4904, lr_0 = 1.1763e-04
Loss = 7.4561e-02, PNorm = 85.5139, GNorm = 0.6570, lr_0 = 1.1755e-04
Loss = 7.3656e-02, PNorm = 85.5149, GNorm = 0.8855, lr_0 = 1.1747e-04
Loss = 7.6697e-02, PNorm = 85.5157, GNorm = 0.8704, lr_0 = 1.1739e-04
Loss = 8.4622e-02, PNorm = 85.5189, GNorm = 0.7047, lr_0 = 1.1730e-04
Loss = 7.6613e-02, PNorm = 85.5215, GNorm = 0.6602, lr_0 = 1.1722e-04
Loss = 7.5975e-02, PNorm = 85.5227, GNorm = 0.6338, lr_0 = 1.1714e-04
Loss = 8.6224e-02, PNorm = 85.5256, GNorm = 0.5154, lr_0 = 1.1706e-04
Loss = 7.1688e-02, PNorm = 85.5298, GNorm = 0.6148, lr_0 = 1.1698e-04
Loss = 6.9176e-02, PNorm = 85.5329, GNorm = 0.7105, lr_0 = 1.1690e-04
Loss = 8.3842e-02, PNorm = 85.5364, GNorm = 0.7381, lr_0 = 1.1682e-04
Loss = 7.0670e-02, PNorm = 85.5376, GNorm = 0.5425, lr_0 = 1.1674e-04
Loss = 7.6863e-02, PNorm = 85.5379, GNorm = 0.6855, lr_0 = 1.1666e-04
Loss = 8.1411e-02, PNorm = 85.5403, GNorm = 0.6284, lr_0 = 1.1658e-04
Loss = 7.6902e-02, PNorm = 85.5422, GNorm = 0.6438, lr_0 = 1.1650e-04
Loss = 6.5446e-02, PNorm = 85.5434, GNorm = 0.6285, lr_0 = 1.1642e-04
Loss = 7.4697e-02, PNorm = 85.5444, GNorm = 0.6601, lr_0 = 1.1634e-04
Loss = 7.0858e-02, PNorm = 85.5436, GNorm = 0.5205, lr_0 = 1.1626e-04
Loss = 7.0242e-02, PNorm = 85.5445, GNorm = 0.4837, lr_0 = 1.1618e-04
Loss = 8.1091e-02, PNorm = 85.5465, GNorm = 0.8043, lr_0 = 1.1611e-04
Loss = 6.8739e-02, PNorm = 85.5481, GNorm = 0.5476, lr_0 = 1.1603e-04
Loss = 6.5925e-02, PNorm = 85.5504, GNorm = 0.5468, lr_0 = 1.1595e-04
Loss = 7.2169e-02, PNorm = 85.5526, GNorm = 0.7207, lr_0 = 1.1587e-04
Loss = 7.4880e-02, PNorm = 85.5547, GNorm = 0.5604, lr_0 = 1.1579e-04
Loss = 7.3806e-02, PNorm = 85.5557, GNorm = 0.6331, lr_0 = 1.1571e-04
Loss = 8.8621e-02, PNorm = 85.5569, GNorm = 0.5624, lr_0 = 1.1563e-04
Loss = 7.3776e-02, PNorm = 85.5574, GNorm = 0.8101, lr_0 = 1.1555e-04
Loss = 7.1834e-02, PNorm = 85.5588, GNorm = 0.5272, lr_0 = 1.1547e-04
Loss = 7.1869e-02, PNorm = 85.5605, GNorm = 0.5518, lr_0 = 1.1539e-04
Loss = 8.1613e-02, PNorm = 85.5636, GNorm = 0.7653, lr_0 = 1.1531e-04
Loss = 7.0773e-02, PNorm = 85.5661, GNorm = 0.6522, lr_0 = 1.1523e-04
Loss = 8.0997e-02, PNorm = 85.5675, GNorm = 0.6283, lr_0 = 1.1515e-04
Loss = 6.9592e-02, PNorm = 85.5685, GNorm = 0.7206, lr_0 = 1.1508e-04
Loss = 6.1564e-02, PNorm = 85.5697, GNorm = 0.5071, lr_0 = 1.1500e-04
Loss = 7.3324e-02, PNorm = 85.5706, GNorm = 0.7369, lr_0 = 1.1492e-04
Loss = 7.4778e-02, PNorm = 85.5713, GNorm = 0.5463, lr_0 = 1.1484e-04
Loss = 8.0028e-02, PNorm = 85.5732, GNorm = 0.7268, lr_0 = 1.1476e-04
Loss = 8.6757e-02, PNorm = 85.5748, GNorm = 0.7689, lr_0 = 1.1468e-04
Loss = 7.6772e-02, PNorm = 85.5753, GNorm = 0.5429, lr_0 = 1.1460e-04
Loss = 7.7282e-02, PNorm = 85.5767, GNorm = 0.6428, lr_0 = 1.1452e-04
Loss = 7.8058e-02, PNorm = 85.5781, GNorm = 0.7676, lr_0 = 1.1445e-04
Loss = 7.9998e-02, PNorm = 85.5790, GNorm = 0.5936, lr_0 = 1.1437e-04
Loss = 7.9179e-02, PNorm = 85.5793, GNorm = 0.5472, lr_0 = 1.1429e-04
Loss = 7.3145e-02, PNorm = 85.5817, GNorm = 0.9519, lr_0 = 1.1421e-04
Loss = 7.0069e-02, PNorm = 85.5837, GNorm = 0.4168, lr_0 = 1.1413e-04
Loss = 7.5423e-02, PNorm = 85.5852, GNorm = 0.4723, lr_0 = 1.1405e-04
Loss = 7.1120e-02, PNorm = 85.5875, GNorm = 0.6420, lr_0 = 1.1398e-04
Loss = 7.2784e-02, PNorm = 85.5898, GNorm = 0.8061, lr_0 = 1.1390e-04
Loss = 7.2547e-02, PNorm = 85.5927, GNorm = 0.6241, lr_0 = 1.1382e-04
Loss = 7.2915e-02, PNorm = 85.5949, GNorm = 0.7107, lr_0 = 1.1374e-04
Loss = 7.9698e-02, PNorm = 85.5959, GNorm = 0.5202, lr_0 = 1.1366e-04
Loss = 7.4597e-02, PNorm = 85.5975, GNorm = 0.5368, lr_0 = 1.1359e-04
Loss = 7.8152e-02, PNorm = 85.5980, GNorm = 0.5954, lr_0 = 1.1351e-04
Loss = 5.9339e-02, PNorm = 85.5992, GNorm = 0.5754, lr_0 = 1.1343e-04
Loss = 7.3167e-02, PNorm = 85.6003, GNorm = 0.6786, lr_0 = 1.1335e-04
Loss = 8.4477e-02, PNorm = 85.6016, GNorm = 0.6031, lr_0 = 1.1328e-04
Loss = 7.1913e-02, PNorm = 85.6024, GNorm = 0.7343, lr_0 = 1.1320e-04
Loss = 7.7760e-02, PNorm = 85.6045, GNorm = 0.5950, lr_0 = 1.1312e-04
Loss = 6.9658e-02, PNorm = 85.6071, GNorm = 0.5758, lr_0 = 1.1304e-04
Loss = 8.2282e-02, PNorm = 85.6100, GNorm = 0.5920, lr_0 = 1.1297e-04
Loss = 6.5733e-02, PNorm = 85.6121, GNorm = 0.7264, lr_0 = 1.1289e-04
Loss = 8.4316e-02, PNorm = 85.6128, GNorm = 0.5484, lr_0 = 1.1281e-04
Loss = 7.9298e-02, PNorm = 85.6158, GNorm = 0.4717, lr_0 = 1.1273e-04
Loss = 7.6813e-02, PNorm = 85.6179, GNorm = 0.5412, lr_0 = 1.1266e-04
Loss = 7.9986e-02, PNorm = 85.6202, GNorm = 0.4827, lr_0 = 1.1258e-04
Loss = 6.9850e-02, PNorm = 85.6223, GNorm = 0.5639, lr_0 = 1.1250e-04
Loss = 7.5148e-02, PNorm = 85.6224, GNorm = 0.7800, lr_0 = 1.1243e-04
Loss = 7.9969e-02, PNorm = 85.6227, GNorm = 0.8218, lr_0 = 1.1235e-04
Loss = 7.4852e-02, PNorm = 85.6237, GNorm = 0.7879, lr_0 = 1.1227e-04
Loss = 7.8648e-02, PNorm = 85.6258, GNorm = 0.6198, lr_0 = 1.1219e-04
Loss = 7.7324e-02, PNorm = 85.6272, GNorm = 0.6444, lr_0 = 1.1212e-04
Loss = 7.8820e-02, PNorm = 85.6292, GNorm = 0.6421, lr_0 = 1.1204e-04
Loss = 8.5432e-02, PNorm = 85.6311, GNorm = 0.7667, lr_0 = 1.1196e-04
Loss = 7.1815e-02, PNorm = 85.6328, GNorm = 0.4723, lr_0 = 1.1189e-04
Loss = 7.2510e-02, PNorm = 85.6343, GNorm = 0.5568, lr_0 = 1.1181e-04
Loss = 7.4437e-02, PNorm = 85.6359, GNorm = 0.4587, lr_0 = 1.1173e-04
Loss = 8.1336e-02, PNorm = 85.6389, GNorm = 0.6907, lr_0 = 1.1166e-04
Loss = 8.8224e-02, PNorm = 85.6408, GNorm = 0.6162, lr_0 = 1.1158e-04
Loss = 6.4751e-02, PNorm = 85.6424, GNorm = 0.5187, lr_0 = 1.1150e-04
Loss = 7.8140e-02, PNorm = 85.6414, GNorm = 0.7099, lr_0 = 1.1143e-04
Loss = 7.8655e-02, PNorm = 85.6429, GNorm = 0.7950, lr_0 = 1.1135e-04
Loss = 7.6230e-02, PNorm = 85.6454, GNorm = 0.5950, lr_0 = 1.1128e-04
Loss = 7.6215e-02, PNorm = 85.6476, GNorm = 0.5771, lr_0 = 1.1120e-04
Loss = 8.8446e-02, PNorm = 85.6499, GNorm = 0.4797, lr_0 = 1.1112e-04
Loss = 7.9598e-02, PNorm = 85.6504, GNorm = 0.7348, lr_0 = 1.1105e-04
Loss = 7.4655e-02, PNorm = 85.6522, GNorm = 0.7731, lr_0 = 1.1097e-04
Loss = 8.8816e-02, PNorm = 85.6536, GNorm = 0.7833, lr_0 = 1.1089e-04
Loss = 7.9950e-02, PNorm = 85.6535, GNorm = 0.5184, lr_0 = 1.1082e-04
Loss = 7.2521e-02, PNorm = 85.6546, GNorm = 0.6710, lr_0 = 1.1074e-04
Loss = 7.9205e-02, PNorm = 85.6564, GNorm = 0.6492, lr_0 = 1.1067e-04
Loss = 7.8999e-02, PNorm = 85.6573, GNorm = 0.5692, lr_0 = 1.1059e-04
Loss = 8.1328e-02, PNorm = 85.6583, GNorm = 0.8112, lr_0 = 1.1052e-04
Loss = 8.3006e-02, PNorm = 85.6582, GNorm = 0.6610, lr_0 = 1.1044e-04
Loss = 7.3020e-02, PNorm = 85.6592, GNorm = 0.5932, lr_0 = 1.1036e-04
Loss = 7.6448e-02, PNorm = 85.6585, GNorm = 0.4461, lr_0 = 1.1029e-04
Loss = 9.4282e-02, PNorm = 85.6597, GNorm = 0.6072, lr_0 = 1.1021e-04
Loss = 8.0771e-02, PNorm = 85.6623, GNorm = 0.7698, lr_0 = 1.1014e-04
Loss = 8.1851e-02, PNorm = 85.6631, GNorm = 0.6349, lr_0 = 1.1006e-04
Loss = 7.9339e-02, PNorm = 85.6654, GNorm = 0.5320, lr_0 = 1.0999e-04
Loss = 6.8566e-02, PNorm = 85.6664, GNorm = 0.5642, lr_0 = 1.0991e-04
Loss = 7.3952e-02, PNorm = 85.6665, GNorm = 0.6080, lr_0 = 1.0984e-04
Loss = 7.4046e-02, PNorm = 85.6678, GNorm = 0.6806, lr_0 = 1.0976e-04
Loss = 6.8457e-02, PNorm = 85.6695, GNorm = 0.7085, lr_0 = 1.0969e-04
Loss = 8.0761e-02, PNorm = 85.6698, GNorm = 0.5517, lr_0 = 1.0961e-04
Loss = 7.7899e-02, PNorm = 85.6707, GNorm = 0.6189, lr_0 = 1.0954e-04
Loss = 8.0543e-02, PNorm = 85.6723, GNorm = 0.6143, lr_0 = 1.0946e-04
Loss = 7.7860e-02, PNorm = 85.6757, GNorm = 0.8068, lr_0 = 1.0939e-04
Loss = 7.7287e-02, PNorm = 85.6783, GNorm = 0.6253, lr_0 = 1.0931e-04
Loss = 7.5172e-02, PNorm = 85.6801, GNorm = 0.7377, lr_0 = 1.0924e-04
Loss = 7.8995e-02, PNorm = 85.6815, GNorm = 0.6068, lr_0 = 1.0916e-04
Loss = 8.0127e-02, PNorm = 85.6827, GNorm = 0.7577, lr_0 = 1.0909e-04
Loss = 8.7007e-02, PNorm = 85.6850, GNorm = 0.6306, lr_0 = 1.0901e-04
Loss = 7.3019e-02, PNorm = 85.6869, GNorm = 0.6892, lr_0 = 1.0894e-04
Loss = 7.6587e-02, PNorm = 85.6890, GNorm = 0.7243, lr_0 = 1.0886e-04
Loss = 7.8142e-02, PNorm = 85.6911, GNorm = 0.6040, lr_0 = 1.0879e-04
Loss = 6.5937e-02, PNorm = 85.6924, GNorm = 0.7271, lr_0 = 1.0871e-04
Loss = 7.6745e-02, PNorm = 85.6940, GNorm = 0.6467, lr_0 = 1.0864e-04
Loss = 7.3377e-02, PNorm = 85.6958, GNorm = 0.5955, lr_0 = 1.0856e-04
Validation mae = 0.228470
Epoch 29
Loss = 6.6190e-02, PNorm = 85.6982, GNorm = 0.6002, lr_0 = 1.0849e-04
Loss = 7.3065e-02, PNorm = 85.7014, GNorm = 0.8758, lr_0 = 1.0841e-04
Loss = 7.2786e-02, PNorm = 85.7053, GNorm = 0.5221, lr_0 = 1.0834e-04
Loss = 7.8434e-02, PNorm = 85.7055, GNorm = 0.5497, lr_0 = 1.0827e-04
Loss = 7.0346e-02, PNorm = 85.7065, GNorm = 0.5134, lr_0 = 1.0819e-04
Loss = 7.6877e-02, PNorm = 85.7079, GNorm = 0.6274, lr_0 = 1.0812e-04
Loss = 7.9283e-02, PNorm = 85.7073, GNorm = 0.6216, lr_0 = 1.0804e-04
Loss = 6.9944e-02, PNorm = 85.7083, GNorm = 0.7288, lr_0 = 1.0797e-04
Loss = 7.1285e-02, PNorm = 85.7097, GNorm = 0.7298, lr_0 = 1.0790e-04
Loss = 7.2764e-02, PNorm = 85.7106, GNorm = 0.6689, lr_0 = 1.0782e-04
Loss = 8.3836e-02, PNorm = 85.7129, GNorm = 0.8367, lr_0 = 1.0775e-04
Loss = 7.1150e-02, PNorm = 85.7158, GNorm = 0.7598, lr_0 = 1.0767e-04
Loss = 8.1243e-02, PNorm = 85.7158, GNorm = 0.4812, lr_0 = 1.0760e-04
Loss = 7.4338e-02, PNorm = 85.7173, GNorm = 0.6959, lr_0 = 1.0753e-04
Loss = 7.2573e-02, PNorm = 85.7199, GNorm = 0.6425, lr_0 = 1.0745e-04
Loss = 7.3013e-02, PNorm = 85.7213, GNorm = 0.6225, lr_0 = 1.0738e-04
Loss = 7.5859e-02, PNorm = 85.7221, GNorm = 0.6059, lr_0 = 1.0731e-04
Loss = 8.4945e-02, PNorm = 85.7224, GNorm = 0.5506, lr_0 = 1.0723e-04
Loss = 6.8452e-02, PNorm = 85.7230, GNorm = 0.4428, lr_0 = 1.0716e-04
Loss = 7.1980e-02, PNorm = 85.7250, GNorm = 0.7202, lr_0 = 1.0709e-04
Loss = 8.2984e-02, PNorm = 85.7266, GNorm = 0.7337, lr_0 = 1.0701e-04
Loss = 8.0754e-02, PNorm = 85.7293, GNorm = 0.5756, lr_0 = 1.0694e-04
Loss = 7.0409e-02, PNorm = 85.7313, GNorm = 0.5013, lr_0 = 1.0687e-04
Loss = 7.3353e-02, PNorm = 85.7315, GNorm = 0.6491, lr_0 = 1.0679e-04
Loss = 7.2645e-02, PNorm = 85.7331, GNorm = 0.7093, lr_0 = 1.0672e-04
Loss = 7.5579e-02, PNorm = 85.7344, GNorm = 0.6468, lr_0 = 1.0665e-04
Loss = 7.4769e-02, PNorm = 85.7341, GNorm = 0.6297, lr_0 = 1.0657e-04
Loss = 7.4388e-02, PNorm = 85.7347, GNorm = 0.8131, lr_0 = 1.0650e-04
Loss = 7.5892e-02, PNorm = 85.7360, GNorm = 0.6583, lr_0 = 1.0643e-04
Loss = 7.2712e-02, PNorm = 85.7390, GNorm = 0.6075, lr_0 = 1.0635e-04
Loss = 7.5750e-02, PNorm = 85.7408, GNorm = 0.6332, lr_0 = 1.0628e-04
Loss = 7.3958e-02, PNorm = 85.7409, GNorm = 0.6681, lr_0 = 1.0621e-04
Loss = 7.5812e-02, PNorm = 85.7422, GNorm = 0.5958, lr_0 = 1.0614e-04
Loss = 7.5723e-02, PNorm = 85.7437, GNorm = 0.5362, lr_0 = 1.0606e-04
Loss = 8.4549e-02, PNorm = 85.7442, GNorm = 0.6409, lr_0 = 1.0599e-04
Loss = 7.7773e-02, PNorm = 85.7450, GNorm = 0.5813, lr_0 = 1.0592e-04
Loss = 8.2392e-02, PNorm = 85.7466, GNorm = 0.4986, lr_0 = 1.0585e-04
Loss = 7.3955e-02, PNorm = 85.7482, GNorm = 0.5225, lr_0 = 1.0577e-04
Loss = 7.4886e-02, PNorm = 85.7502, GNorm = 0.8755, lr_0 = 1.0570e-04
Loss = 7.5829e-02, PNorm = 85.7508, GNorm = 0.9202, lr_0 = 1.0563e-04
Loss = 7.6390e-02, PNorm = 85.7504, GNorm = 0.6104, lr_0 = 1.0556e-04
Loss = 6.2816e-02, PNorm = 85.7514, GNorm = 0.5299, lr_0 = 1.0548e-04
Loss = 8.5126e-02, PNorm = 85.7525, GNorm = 0.5118, lr_0 = 1.0541e-04
Loss = 7.4005e-02, PNorm = 85.7538, GNorm = 0.6149, lr_0 = 1.0534e-04
Loss = 7.7054e-02, PNorm = 85.7557, GNorm = 0.5717, lr_0 = 1.0527e-04
Loss = 6.8029e-02, PNorm = 85.7578, GNorm = 0.5741, lr_0 = 1.0519e-04
Loss = 6.7090e-02, PNorm = 85.7586, GNorm = 0.7873, lr_0 = 1.0512e-04
Loss = 8.7473e-02, PNorm = 85.7591, GNorm = 0.6033, lr_0 = 1.0505e-04
Loss = 6.8200e-02, PNorm = 85.7604, GNorm = 0.6727, lr_0 = 1.0498e-04
Loss = 8.0280e-02, PNorm = 85.7627, GNorm = 0.5324, lr_0 = 1.0491e-04
Loss = 6.4172e-02, PNorm = 85.7648, GNorm = 0.4693, lr_0 = 1.0483e-04
Loss = 7.1900e-02, PNorm = 85.7657, GNorm = 0.5947, lr_0 = 1.0476e-04
Loss = 8.3145e-02, PNorm = 85.7660, GNorm = 0.5260, lr_0 = 1.0469e-04
Loss = 6.8781e-02, PNorm = 85.7677, GNorm = 0.5234, lr_0 = 1.0462e-04
Loss = 6.4251e-02, PNorm = 85.7688, GNorm = 0.4921, lr_0 = 1.0455e-04
Loss = 7.2990e-02, PNorm = 85.7714, GNorm = 0.5886, lr_0 = 1.0448e-04
Loss = 8.3520e-02, PNorm = 85.7735, GNorm = 0.6735, lr_0 = 1.0440e-04
Loss = 6.1803e-02, PNorm = 85.7761, GNorm = 0.5915, lr_0 = 1.0433e-04
Loss = 7.9712e-02, PNorm = 85.7765, GNorm = 0.5851, lr_0 = 1.0426e-04
Loss = 7.8843e-02, PNorm = 85.7769, GNorm = 0.3086, lr_0 = 1.0419e-04
Loss = 7.5815e-02, PNorm = 85.7781, GNorm = 0.6629, lr_0 = 1.0412e-04
Loss = 7.6405e-02, PNorm = 85.7783, GNorm = 0.8068, lr_0 = 1.0405e-04
Loss = 7.9172e-02, PNorm = 85.7775, GNorm = 0.7192, lr_0 = 1.0398e-04
Loss = 8.1133e-02, PNorm = 85.7781, GNorm = 0.9071, lr_0 = 1.0391e-04
Loss = 7.8575e-02, PNorm = 85.7807, GNorm = 0.6264, lr_0 = 1.0383e-04
Loss = 7.4643e-02, PNorm = 85.7830, GNorm = 0.9018, lr_0 = 1.0376e-04
Loss = 8.3999e-02, PNorm = 85.7847, GNorm = 0.5620, lr_0 = 1.0369e-04
Loss = 7.5455e-02, PNorm = 85.7853, GNorm = 0.6378, lr_0 = 1.0362e-04
Loss = 7.8836e-02, PNorm = 85.7871, GNorm = 0.8191, lr_0 = 1.0355e-04
Loss = 7.2494e-02, PNorm = 85.7888, GNorm = 0.6336, lr_0 = 1.0348e-04
Loss = 7.8866e-02, PNorm = 85.7895, GNorm = 0.4740, lr_0 = 1.0341e-04
Loss = 7.7512e-02, PNorm = 85.7911, GNorm = 0.9488, lr_0 = 1.0334e-04
Loss = 8.5395e-02, PNorm = 85.7940, GNorm = 0.6533, lr_0 = 1.0327e-04
Loss = 7.3990e-02, PNorm = 85.7955, GNorm = 0.6424, lr_0 = 1.0320e-04
Loss = 7.5946e-02, PNorm = 85.7971, GNorm = 0.7366, lr_0 = 1.0312e-04
Loss = 7.2954e-02, PNorm = 85.7985, GNorm = 0.6860, lr_0 = 1.0305e-04
Loss = 7.0251e-02, PNorm = 85.7997, GNorm = 0.6664, lr_0 = 1.0298e-04
Loss = 6.3855e-02, PNorm = 85.8017, GNorm = 0.5017, lr_0 = 1.0291e-04
Loss = 7.6512e-02, PNorm = 85.8021, GNorm = 0.5777, lr_0 = 1.0284e-04
Loss = 6.9474e-02, PNorm = 85.8035, GNorm = 0.5220, lr_0 = 1.0277e-04
Loss = 6.5657e-02, PNorm = 85.8056, GNorm = 0.6717, lr_0 = 1.0270e-04
Loss = 7.5522e-02, PNorm = 85.8073, GNorm = 0.6055, lr_0 = 1.0263e-04
Loss = 9.6276e-02, PNorm = 85.8095, GNorm = 0.7061, lr_0 = 1.0256e-04
Loss = 7.1459e-02, PNorm = 85.8111, GNorm = 0.4991, lr_0 = 1.0249e-04
Loss = 6.7584e-02, PNorm = 85.8130, GNorm = 0.6711, lr_0 = 1.0242e-04
Loss = 7.2013e-02, PNorm = 85.8151, GNorm = 0.7923, lr_0 = 1.0235e-04
Loss = 7.8991e-02, PNorm = 85.8169, GNorm = 0.7395, lr_0 = 1.0228e-04
Loss = 6.5578e-02, PNorm = 85.8163, GNorm = 0.4661, lr_0 = 1.0221e-04
Loss = 7.5454e-02, PNorm = 85.8183, GNorm = 0.7058, lr_0 = 1.0214e-04
Loss = 7.1671e-02, PNorm = 85.8179, GNorm = 0.5268, lr_0 = 1.0207e-04
Loss = 7.5563e-02, PNorm = 85.8183, GNorm = 0.6943, lr_0 = 1.0200e-04
Loss = 6.6628e-02, PNorm = 85.8190, GNorm = 0.4425, lr_0 = 1.0193e-04
Loss = 7.8461e-02, PNorm = 85.8202, GNorm = 0.6742, lr_0 = 1.0186e-04
Loss = 8.0363e-02, PNorm = 85.8211, GNorm = 0.6685, lr_0 = 1.0179e-04
Loss = 7.5103e-02, PNorm = 85.8231, GNorm = 0.5542, lr_0 = 1.0172e-04
Loss = 6.3617e-02, PNorm = 85.8240, GNorm = 0.7240, lr_0 = 1.0165e-04
Loss = 6.5107e-02, PNorm = 85.8256, GNorm = 0.4833, lr_0 = 1.0158e-04
Loss = 7.4004e-02, PNorm = 85.8266, GNorm = 0.4864, lr_0 = 1.0151e-04
Loss = 7.2959e-02, PNorm = 85.8278, GNorm = 0.8555, lr_0 = 1.0144e-04
Loss = 7.5814e-02, PNorm = 85.8300, GNorm = 0.7246, lr_0 = 1.0137e-04
Loss = 7.7499e-02, PNorm = 85.8304, GNorm = 0.7248, lr_0 = 1.0130e-04
Loss = 8.1653e-02, PNorm = 85.8310, GNorm = 0.6147, lr_0 = 1.0123e-04
Loss = 7.4062e-02, PNorm = 85.8335, GNorm = 0.6488, lr_0 = 1.0116e-04
Loss = 7.8281e-02, PNorm = 85.8361, GNorm = 0.7131, lr_0 = 1.0110e-04
Loss = 7.2572e-02, PNorm = 85.8367, GNorm = 0.5382, lr_0 = 1.0103e-04
Loss = 9.0406e-02, PNorm = 85.8380, GNorm = 0.8626, lr_0 = 1.0096e-04
Loss = 7.6455e-02, PNorm = 85.8404, GNorm = 0.5370, lr_0 = 1.0089e-04
Loss = 7.5852e-02, PNorm = 85.8430, GNorm = 0.4697, lr_0 = 1.0082e-04
Loss = 7.4536e-02, PNorm = 85.8437, GNorm = 0.6689, lr_0 = 1.0075e-04
Loss = 6.8334e-02, PNorm = 85.8449, GNorm = 0.6669, lr_0 = 1.0068e-04
Loss = 6.4602e-02, PNorm = 85.8468, GNorm = 0.5722, lr_0 = 1.0061e-04
Loss = 7.5540e-02, PNorm = 85.8480, GNorm = 0.7593, lr_0 = 1.0054e-04
Loss = 7.2211e-02, PNorm = 85.8499, GNorm = 0.7265, lr_0 = 1.0047e-04
Loss = 7.6991e-02, PNorm = 85.8501, GNorm = 0.6331, lr_0 = 1.0041e-04
Loss = 8.2953e-02, PNorm = 85.8515, GNorm = 0.5851, lr_0 = 1.0034e-04
Loss = 8.2482e-02, PNorm = 85.8544, GNorm = 0.7800, lr_0 = 1.0027e-04
Loss = 8.2367e-02, PNorm = 85.8559, GNorm = 0.7390, lr_0 = 1.0020e-04
Loss = 6.6380e-02, PNorm = 85.8574, GNorm = 0.5936, lr_0 = 1.0013e-04
Loss = 7.1058e-02, PNorm = 85.8598, GNorm = 0.5787, lr_0 = 1.0006e-04
Loss = 7.8299e-02, PNorm = 85.8623, GNorm = 0.6271, lr_0 = 1.0000e-04
Validation mae = 0.226309
Model 0 best validation mae = 0.225568 on epoch 27
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.224777
Ensemble test mae = 0.224777
Fold 5
Splitting data with seed 5
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.1, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=500, bias=False)
        (W_h): Linear(in_features=500, out_features=500, bias=False)
        (W_o): Linear(in_features=633, out_features=500, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.1, inplace=False)
    (1): Linear(in_features=500, out_features=500, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.1, inplace=False)
    (4): Linear(in_features=500, out_features=500, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.1, inplace=False)
    (7): Linear(in_features=500, out_features=1, bias=True)
  )
)
Number of parameters = 1,142,001
Moving model to cuda
Epoch 0
Loss = 9.7385e-01, PNorm = 47.8608, GNorm = 2.1560, lr_0 = 1.0413e-04
Loss = 1.0254e+00, PNorm = 47.8619, GNorm = 2.5334, lr_0 = 1.0788e-04
Loss = 9.0242e-01, PNorm = 47.8635, GNorm = 3.4547, lr_0 = 1.1163e-04
Loss = 1.0108e+00, PNorm = 47.8661, GNorm = 2.6504, lr_0 = 1.1537e-04
Loss = 8.1540e-01, PNorm = 47.8692, GNorm = 5.7582, lr_0 = 1.1913e-04
Loss = 8.3561e-01, PNorm = 47.8729, GNorm = 3.6815, lr_0 = 1.2287e-04
Loss = 8.0957e-01, PNorm = 47.8782, GNorm = 5.8546, lr_0 = 1.2663e-04
Loss = 7.3032e-01, PNorm = 47.8844, GNorm = 12.0575, lr_0 = 1.3038e-04
Loss = 7.0867e-01, PNorm = 47.8904, GNorm = 9.5049, lr_0 = 1.3413e-04
Loss = 6.5984e-01, PNorm = 47.8952, GNorm = 10.7025, lr_0 = 1.3788e-04
Loss = 7.2053e-01, PNorm = 47.9002, GNorm = 5.5204, lr_0 = 1.4163e-04
Loss = 6.8958e-01, PNorm = 47.9054, GNorm = 6.3286, lr_0 = 1.4537e-04
Loss = 5.8939e-01, PNorm = 47.9111, GNorm = 11.7463, lr_0 = 1.4913e-04
Loss = 6.4985e-01, PNorm = 47.9167, GNorm = 13.3505, lr_0 = 1.5288e-04
Loss = 4.9470e-01, PNorm = 47.9226, GNorm = 5.2518, lr_0 = 1.5662e-04
Loss = 4.8812e-01, PNorm = 47.9272, GNorm = 2.8373, lr_0 = 1.6038e-04
Loss = 5.3652e-01, PNorm = 47.9312, GNorm = 14.7008, lr_0 = 1.6412e-04
Loss = 5.8427e-01, PNorm = 47.9371, GNorm = 8.7965, lr_0 = 1.6788e-04
Loss = 4.4354e-01, PNorm = 47.9433, GNorm = 4.4167, lr_0 = 1.7163e-04
Loss = 4.3259e-01, PNorm = 47.9498, GNorm = 5.3746, lr_0 = 1.7538e-04
Loss = 3.8630e-01, PNorm = 47.9562, GNorm = 3.6813, lr_0 = 1.7913e-04
Loss = 4.6008e-01, PNorm = 47.9617, GNorm = 4.0431, lr_0 = 1.8288e-04
Loss = 4.1573e-01, PNorm = 47.9678, GNorm = 2.8403, lr_0 = 1.8662e-04
Loss = 3.7739e-01, PNorm = 47.9739, GNorm = 14.8508, lr_0 = 1.9038e-04
Loss = 4.5065e-01, PNorm = 47.9781, GNorm = 14.6736, lr_0 = 1.9413e-04
Loss = 4.2155e-01, PNorm = 47.9839, GNorm = 8.0452, lr_0 = 1.9788e-04
Loss = 4.6303e-01, PNorm = 47.9899, GNorm = 21.3191, lr_0 = 2.0163e-04
Loss = 3.6904e-01, PNorm = 47.9959, GNorm = 1.7467, lr_0 = 2.0537e-04
Loss = 4.0503e-01, PNorm = 48.0009, GNorm = 4.1239, lr_0 = 2.0913e-04
Loss = 4.0719e-01, PNorm = 48.0075, GNorm = 9.1116, lr_0 = 2.1288e-04
Loss = 3.5584e-01, PNorm = 48.0132, GNorm = 1.4738, lr_0 = 2.1663e-04
Loss = 3.2952e-01, PNorm = 48.0180, GNorm = 7.4480, lr_0 = 2.2038e-04
Loss = 3.0370e-01, PNorm = 48.0226, GNorm = 4.2894, lr_0 = 2.2412e-04
Loss = 3.8142e-01, PNorm = 48.0278, GNorm = 2.8846, lr_0 = 2.2787e-04
Loss = 3.4709e-01, PNorm = 48.0326, GNorm = 10.7127, lr_0 = 2.3163e-04
Loss = 3.2278e-01, PNorm = 48.0377, GNorm = 8.2219, lr_0 = 2.3538e-04
Loss = 3.6408e-01, PNorm = 48.0435, GNorm = 13.8041, lr_0 = 2.3913e-04
Loss = 3.1999e-01, PNorm = 48.0476, GNorm = 11.0955, lr_0 = 2.4288e-04
Loss = 3.5018e-01, PNorm = 48.0533, GNorm = 5.6706, lr_0 = 2.4662e-04
Loss = 3.3333e-01, PNorm = 48.0598, GNorm = 2.2706, lr_0 = 2.5038e-04
Loss = 3.3418e-01, PNorm = 48.0646, GNorm = 2.1406, lr_0 = 2.5413e-04
Loss = 4.0009e-01, PNorm = 48.0703, GNorm = 5.5887, lr_0 = 2.5788e-04
Loss = 3.9750e-01, PNorm = 48.0783, GNorm = 5.8493, lr_0 = 2.6163e-04
Loss = 4.0104e-01, PNorm = 48.0850, GNorm = 3.6937, lr_0 = 2.6537e-04
Loss = 3.9302e-01, PNorm = 48.0928, GNorm = 2.6796, lr_0 = 2.6912e-04
Loss = 3.3162e-01, PNorm = 48.1001, GNorm = 10.3743, lr_0 = 2.7288e-04
Loss = 3.1097e-01, PNorm = 48.1048, GNorm = 2.1329, lr_0 = 2.7663e-04
Loss = 3.2058e-01, PNorm = 48.1107, GNorm = 10.3759, lr_0 = 2.8038e-04
Loss = 3.3558e-01, PNorm = 48.1169, GNorm = 1.6354, lr_0 = 2.8413e-04
Loss = 3.5657e-01, PNorm = 48.1228, GNorm = 16.2866, lr_0 = 2.8787e-04
Loss = 3.6094e-01, PNorm = 48.1272, GNorm = 12.4243, lr_0 = 2.9163e-04
Loss = 2.9976e-01, PNorm = 48.1339, GNorm = 7.2833, lr_0 = 2.9538e-04
Loss = 3.2234e-01, PNorm = 48.1405, GNorm = 9.4112, lr_0 = 2.9913e-04
Loss = 3.3463e-01, PNorm = 48.1457, GNorm = 1.3640, lr_0 = 3.0288e-04
Loss = 3.0864e-01, PNorm = 48.1536, GNorm = 1.5019, lr_0 = 3.0662e-04
Loss = 3.1626e-01, PNorm = 48.1610, GNorm = 5.6427, lr_0 = 3.1037e-04
Loss = 3.0540e-01, PNorm = 48.1658, GNorm = 3.5633, lr_0 = 3.1413e-04
Loss = 3.8024e-01, PNorm = 48.1753, GNorm = 8.7452, lr_0 = 3.1788e-04
Loss = 2.8950e-01, PNorm = 48.1792, GNorm = 2.9757, lr_0 = 3.2163e-04
Loss = 3.2365e-01, PNorm = 48.1852, GNorm = 1.8977, lr_0 = 3.2538e-04
Loss = 3.0949e-01, PNorm = 48.1932, GNorm = 4.0115, lr_0 = 3.2912e-04
Loss = 3.3149e-01, PNorm = 48.2008, GNorm = 2.1560, lr_0 = 3.3288e-04
Loss = 3.2372e-01, PNorm = 48.2067, GNorm = 6.5774, lr_0 = 3.3663e-04
Loss = 3.1183e-01, PNorm = 48.2144, GNorm = 2.1198, lr_0 = 3.4038e-04
Loss = 3.2506e-01, PNorm = 48.2217, GNorm = 5.1413, lr_0 = 3.4413e-04
Loss = 2.9818e-01, PNorm = 48.2298, GNorm = 3.3287, lr_0 = 3.4787e-04
Loss = 3.0117e-01, PNorm = 48.2393, GNorm = 7.1767, lr_0 = 3.5162e-04
Loss = 2.6681e-01, PNorm = 48.2440, GNorm = 4.1491, lr_0 = 3.5538e-04
Loss = 2.7916e-01, PNorm = 48.2520, GNorm = 2.6326, lr_0 = 3.5913e-04
Loss = 2.8583e-01, PNorm = 48.2594, GNorm = 5.3690, lr_0 = 3.6288e-04
Loss = 3.1416e-01, PNorm = 48.2675, GNorm = 4.7373, lr_0 = 3.6662e-04
Loss = 3.1465e-01, PNorm = 48.2776, GNorm = 15.7952, lr_0 = 3.7037e-04
Loss = 3.3425e-01, PNorm = 48.2839, GNorm = 3.5857, lr_0 = 3.7413e-04
Loss = 3.4223e-01, PNorm = 48.2948, GNorm = 14.7019, lr_0 = 3.7788e-04
Loss = 3.4270e-01, PNorm = 48.3036, GNorm = 1.4727, lr_0 = 3.8163e-04
Loss = 2.8262e-01, PNorm = 48.3130, GNorm = 5.2627, lr_0 = 3.8537e-04
Loss = 2.7762e-01, PNorm = 48.3221, GNorm = 2.8454, lr_0 = 3.8912e-04
Loss = 2.9404e-01, PNorm = 48.3259, GNorm = 4.8421, lr_0 = 3.9287e-04
Loss = 3.5825e-01, PNorm = 48.3316, GNorm = 5.2072, lr_0 = 3.9663e-04
Loss = 3.4786e-01, PNorm = 48.3404, GNorm = 1.8351, lr_0 = 4.0038e-04
Loss = 3.0886e-01, PNorm = 48.3486, GNorm = 1.5145, lr_0 = 4.0413e-04
Loss = 3.2615e-01, PNorm = 48.3578, GNorm = 2.0225, lr_0 = 4.0787e-04
Loss = 2.7518e-01, PNorm = 48.3651, GNorm = 1.3033, lr_0 = 4.1162e-04
Loss = 2.9915e-01, PNorm = 48.3716, GNorm = 2.6608, lr_0 = 4.1537e-04
Loss = 2.3994e-01, PNorm = 48.3809, GNorm = 9.0046, lr_0 = 4.1913e-04
Loss = 2.9496e-01, PNorm = 48.3894, GNorm = 5.2402, lr_0 = 4.2288e-04
Loss = 3.3176e-01, PNorm = 48.3961, GNorm = 2.0064, lr_0 = 4.2662e-04
Loss = 3.2892e-01, PNorm = 48.4062, GNorm = 7.7913, lr_0 = 4.3037e-04
Loss = 2.6541e-01, PNorm = 48.4186, GNorm = 2.3322, lr_0 = 4.3412e-04
Loss = 2.9368e-01, PNorm = 48.4277, GNorm = 3.7132, lr_0 = 4.3788e-04
Loss = 2.7597e-01, PNorm = 48.4360, GNorm = 4.9826, lr_0 = 4.4163e-04
Loss = 3.2246e-01, PNorm = 48.4423, GNorm = 12.1741, lr_0 = 4.4538e-04
Loss = 3.2686e-01, PNorm = 48.4512, GNorm = 5.1483, lr_0 = 4.4912e-04
Loss = 3.3581e-01, PNorm = 48.4616, GNorm = 8.3995, lr_0 = 4.5287e-04
Loss = 2.8592e-01, PNorm = 48.4731, GNorm = 3.7406, lr_0 = 4.5662e-04
Loss = 2.5755e-01, PNorm = 48.4834, GNorm = 2.8827, lr_0 = 4.6038e-04
Loss = 3.1223e-01, PNorm = 48.4922, GNorm = 3.3146, lr_0 = 4.6413e-04
Loss = 3.9052e-01, PNorm = 48.5027, GNorm = 6.3404, lr_0 = 4.6787e-04
Loss = 3.0073e-01, PNorm = 48.5172, GNorm = 2.7126, lr_0 = 4.7162e-04
Loss = 3.1689e-01, PNorm = 48.5289, GNorm = 4.2660, lr_0 = 4.7537e-04
Loss = 2.7466e-01, PNorm = 48.5400, GNorm = 3.2179, lr_0 = 4.7913e-04
Loss = 2.5338e-01, PNorm = 48.5489, GNorm = 4.7011, lr_0 = 4.8288e-04
Loss = 2.7265e-01, PNorm = 48.5537, GNorm = 1.6396, lr_0 = 4.8663e-04
Loss = 2.7679e-01, PNorm = 48.5574, GNorm = 1.5262, lr_0 = 4.9038e-04
Loss = 2.3643e-01, PNorm = 48.5655, GNorm = 6.2827, lr_0 = 4.9412e-04
Loss = 2.5060e-01, PNorm = 48.5710, GNorm = 6.4293, lr_0 = 4.9788e-04
Loss = 2.6540e-01, PNorm = 48.5760, GNorm = 3.4908, lr_0 = 5.0163e-04
Loss = 2.8374e-01, PNorm = 48.5888, GNorm = 2.6085, lr_0 = 5.0538e-04
Loss = 2.7154e-01, PNorm = 48.6015, GNorm = 1.1509, lr_0 = 5.0913e-04
Loss = 2.7850e-01, PNorm = 48.6163, GNorm = 8.0935, lr_0 = 5.1287e-04
Loss = 3.0829e-01, PNorm = 48.6262, GNorm = 0.9890, lr_0 = 5.1663e-04
Loss = 2.7523e-01, PNorm = 48.6375, GNorm = 1.1872, lr_0 = 5.2038e-04
Loss = 2.6790e-01, PNorm = 48.6502, GNorm = 7.0663, lr_0 = 5.2413e-04
Loss = 3.6041e-01, PNorm = 48.6541, GNorm = 9.6593, lr_0 = 5.2788e-04
Loss = 3.2244e-01, PNorm = 48.6661, GNorm = 6.3890, lr_0 = 5.3162e-04
Loss = 3.1772e-01, PNorm = 48.6814, GNorm = 4.7849, lr_0 = 5.3538e-04
Loss = 2.7215e-01, PNorm = 48.6903, GNorm = 2.5923, lr_0 = 5.3912e-04
Loss = 2.5175e-01, PNorm = 48.7006, GNorm = 2.5107, lr_0 = 5.4288e-04
Loss = 2.9109e-01, PNorm = 48.7094, GNorm = 1.0989, lr_0 = 5.4663e-04
Loss = 2.5931e-01, PNorm = 48.7198, GNorm = 5.4478, lr_0 = 5.5038e-04
Validation mae = 0.304706
Epoch 1
Loss = 2.2019e-01, PNorm = 48.7271, GNorm = 2.3540, lr_0 = 5.5413e-04
Loss = 2.6161e-01, PNorm = 48.7353, GNorm = 1.5556, lr_0 = 5.5787e-04
Loss = 2.7058e-01, PNorm = 48.7435, GNorm = 5.8943, lr_0 = 5.6163e-04
Loss = 2.5711e-01, PNorm = 48.7510, GNorm = 6.9705, lr_0 = 5.6538e-04
Loss = 2.7298e-01, PNorm = 48.7620, GNorm = 4.9624, lr_0 = 5.6913e-04
Loss = 3.0008e-01, PNorm = 48.7765, GNorm = 5.1459, lr_0 = 5.7288e-04
Loss = 2.7145e-01, PNorm = 48.7920, GNorm = 2.3770, lr_0 = 5.7662e-04
Loss = 2.9079e-01, PNorm = 48.8010, GNorm = 4.5179, lr_0 = 5.8038e-04
Loss = 2.6031e-01, PNorm = 48.8172, GNorm = 3.8832, lr_0 = 5.8413e-04
Loss = 2.6664e-01, PNorm = 48.8296, GNorm = 1.8479, lr_0 = 5.8788e-04
Loss = 2.5989e-01, PNorm = 48.8437, GNorm = 1.5247, lr_0 = 5.9163e-04
Loss = 2.7147e-01, PNorm = 48.8525, GNorm = 2.5723, lr_0 = 5.9538e-04
Loss = 2.4344e-01, PNorm = 48.8659, GNorm = 1.7880, lr_0 = 5.9913e-04
Loss = 2.5122e-01, PNorm = 48.8746, GNorm = 6.3750, lr_0 = 6.0288e-04
Loss = 2.7636e-01, PNorm = 48.8829, GNorm = 7.4077, lr_0 = 6.0663e-04
Loss = 3.0352e-01, PNorm = 48.8971, GNorm = 7.8051, lr_0 = 6.1038e-04
Loss = 3.6800e-01, PNorm = 48.9155, GNorm = 13.3240, lr_0 = 6.1413e-04
Loss = 3.4004e-01, PNorm = 48.9316, GNorm = 3.3951, lr_0 = 6.1788e-04
Loss = 3.2827e-01, PNorm = 48.9538, GNorm = 4.0896, lr_0 = 6.2163e-04
Loss = 2.8793e-01, PNorm = 48.9663, GNorm = 2.3476, lr_0 = 6.2538e-04
Loss = 2.5253e-01, PNorm = 48.9782, GNorm = 3.2891, lr_0 = 6.2913e-04
Loss = 2.6552e-01, PNorm = 48.9944, GNorm = 5.8818, lr_0 = 6.3288e-04
Loss = 2.7080e-01, PNorm = 49.0041, GNorm = 9.2359, lr_0 = 6.3663e-04
Loss = 2.9966e-01, PNorm = 49.0130, GNorm = 6.1059, lr_0 = 6.4038e-04
Loss = 3.3216e-01, PNorm = 49.0377, GNorm = 4.4807, lr_0 = 6.4413e-04
Loss = 3.2628e-01, PNorm = 49.0477, GNorm = 2.6623, lr_0 = 6.4788e-04
Loss = 2.7022e-01, PNorm = 49.0650, GNorm = 3.1425, lr_0 = 6.5163e-04
Loss = 2.9839e-01, PNorm = 49.0837, GNorm = 2.5732, lr_0 = 6.5538e-04
Loss = 2.5425e-01, PNorm = 49.0920, GNorm = 0.8947, lr_0 = 6.5913e-04
Loss = 2.7125e-01, PNorm = 49.1065, GNorm = 2.5404, lr_0 = 6.6288e-04
Loss = 3.0221e-01, PNorm = 49.1222, GNorm = 6.6352, lr_0 = 6.6663e-04
Loss = 2.6661e-01, PNorm = 49.1346, GNorm = 0.7923, lr_0 = 6.7038e-04
Loss = 2.4344e-01, PNorm = 49.1476, GNorm = 2.1798, lr_0 = 6.7413e-04
Loss = 2.8439e-01, PNorm = 49.1571, GNorm = 6.5181, lr_0 = 6.7788e-04
Loss = 2.7269e-01, PNorm = 49.1712, GNorm = 2.7890, lr_0 = 6.8163e-04
Loss = 2.8476e-01, PNorm = 49.1838, GNorm = 2.1443, lr_0 = 6.8538e-04
Loss = 2.7069e-01, PNorm = 49.2039, GNorm = 0.8426, lr_0 = 6.8913e-04
Loss = 3.1286e-01, PNorm = 49.2275, GNorm = 6.7691, lr_0 = 6.9288e-04
Loss = 3.5357e-01, PNorm = 49.2469, GNorm = 3.0397, lr_0 = 6.9663e-04
Loss = 3.1644e-01, PNorm = 49.2635, GNorm = 2.6785, lr_0 = 7.0038e-04
Loss = 3.0465e-01, PNorm = 49.2861, GNorm = 2.1011, lr_0 = 7.0413e-04
Loss = 2.5467e-01, PNorm = 49.3019, GNorm = 2.3711, lr_0 = 7.0788e-04
Loss = 2.5260e-01, PNorm = 49.3185, GNorm = 2.1874, lr_0 = 7.1163e-04
Loss = 2.6243e-01, PNorm = 49.3374, GNorm = 2.4680, lr_0 = 7.1538e-04
Loss = 2.8218e-01, PNorm = 49.3489, GNorm = 2.5202, lr_0 = 7.1913e-04
Loss = 3.1430e-01, PNorm = 49.3689, GNorm = 5.9678, lr_0 = 7.2288e-04
Loss = 2.6825e-01, PNorm = 49.3859, GNorm = 1.6668, lr_0 = 7.2663e-04
Loss = 2.6225e-01, PNorm = 49.4071, GNorm = 3.6912, lr_0 = 7.3038e-04
Loss = 2.7581e-01, PNorm = 49.4166, GNorm = 0.9196, lr_0 = 7.3413e-04
Loss = 2.4740e-01, PNorm = 49.4295, GNorm = 1.8702, lr_0 = 7.3788e-04
Loss = 2.0840e-01, PNorm = 49.4408, GNorm = 1.5065, lr_0 = 7.4163e-04
Loss = 2.5565e-01, PNorm = 49.4521, GNorm = 4.7449, lr_0 = 7.4538e-04
Loss = 2.4370e-01, PNorm = 49.4623, GNorm = 6.2641, lr_0 = 7.4913e-04
Loss = 2.7024e-01, PNorm = 49.4823, GNorm = 1.0062, lr_0 = 7.5288e-04
Loss = 2.7380e-01, PNorm = 49.4926, GNorm = 2.5157, lr_0 = 7.5663e-04
Loss = 2.8400e-01, PNorm = 49.5082, GNorm = 2.1537, lr_0 = 7.6038e-04
Loss = 2.6382e-01, PNorm = 49.5281, GNorm = 5.3112, lr_0 = 7.6413e-04
Loss = 2.8774e-01, PNorm = 49.5500, GNorm = 1.8253, lr_0 = 7.6788e-04
Loss = 2.5524e-01, PNorm = 49.5669, GNorm = 0.9503, lr_0 = 7.7163e-04
Loss = 2.5018e-01, PNorm = 49.5886, GNorm = 1.5547, lr_0 = 7.7538e-04
Loss = 2.7291e-01, PNorm = 49.6084, GNorm = 1.2341, lr_0 = 7.7913e-04
Loss = 2.9251e-01, PNorm = 49.6295, GNorm = 5.2375, lr_0 = 7.8288e-04
Loss = 2.6739e-01, PNorm = 49.6506, GNorm = 2.9303, lr_0 = 7.8663e-04
Loss = 2.9224e-01, PNorm = 49.6698, GNorm = 1.2769, lr_0 = 7.9038e-04
Loss = 2.4348e-01, PNorm = 49.6766, GNorm = 2.5529, lr_0 = 7.9413e-04
Loss = 2.2450e-01, PNorm = 49.6866, GNorm = 1.1889, lr_0 = 7.9788e-04
Loss = 2.8048e-01, PNorm = 49.6940, GNorm = 2.5266, lr_0 = 8.0163e-04
Loss = 2.9432e-01, PNorm = 49.7095, GNorm = 3.0321, lr_0 = 8.0538e-04
Loss = 2.6515e-01, PNorm = 49.7357, GNorm = 3.5898, lr_0 = 8.0913e-04
Loss = 2.4950e-01, PNorm = 49.7575, GNorm = 1.8648, lr_0 = 8.1288e-04
Loss = 2.3220e-01, PNorm = 49.7763, GNorm = 1.2552, lr_0 = 8.1663e-04
Loss = 2.5534e-01, PNorm = 49.7985, GNorm = 2.8639, lr_0 = 8.2038e-04
Loss = 2.2985e-01, PNorm = 49.8087, GNorm = 1.0480, lr_0 = 8.2413e-04
Loss = 2.8793e-01, PNorm = 49.8241, GNorm = 5.1207, lr_0 = 8.2788e-04
Loss = 2.5826e-01, PNorm = 49.8472, GNorm = 2.3163, lr_0 = 8.3163e-04
Loss = 2.4463e-01, PNorm = 49.8705, GNorm = 1.5457, lr_0 = 8.3538e-04
Loss = 2.6998e-01, PNorm = 49.8957, GNorm = 2.3187, lr_0 = 8.3913e-04
Loss = 2.5214e-01, PNorm = 49.9139, GNorm = 2.0097, lr_0 = 8.4288e-04
Loss = 2.7651e-01, PNorm = 49.9364, GNorm = 2.3645, lr_0 = 8.4663e-04
Loss = 2.5479e-01, PNorm = 49.9669, GNorm = 1.0839, lr_0 = 8.5038e-04
Loss = 2.9510e-01, PNorm = 49.9856, GNorm = 4.6023, lr_0 = 8.5413e-04
Loss = 2.5188e-01, PNorm = 50.0144, GNorm = 2.2579, lr_0 = 8.5788e-04
Loss = 2.5440e-01, PNorm = 50.0399, GNorm = 0.8066, lr_0 = 8.6163e-04
Loss = 2.5693e-01, PNorm = 50.0705, GNorm = 1.1822, lr_0 = 8.6538e-04
Loss = 2.2726e-01, PNorm = 50.0882, GNorm = 3.0499, lr_0 = 8.6913e-04
Loss = 2.6912e-01, PNorm = 50.1009, GNorm = 3.6105, lr_0 = 8.7288e-04
Loss = 2.8378e-01, PNorm = 50.1220, GNorm = 2.2624, lr_0 = 8.7663e-04
Loss = 3.3259e-01, PNorm = 50.1540, GNorm = 5.6641, lr_0 = 8.8038e-04
Loss = 3.1979e-01, PNorm = 50.1818, GNorm = 1.9506, lr_0 = 8.8413e-04
Loss = 2.7069e-01, PNorm = 50.2079, GNorm = 0.8551, lr_0 = 8.8788e-04
Loss = 2.6941e-01, PNorm = 50.2252, GNorm = 5.6787, lr_0 = 8.9163e-04
Loss = 2.0620e-01, PNorm = 50.2498, GNorm = 1.1741, lr_0 = 8.9538e-04
Loss = 2.4365e-01, PNorm = 50.2665, GNorm = 4.0809, lr_0 = 8.9913e-04
Loss = 2.6485e-01, PNorm = 50.2859, GNorm = 1.9124, lr_0 = 9.0288e-04
Loss = 2.4710e-01, PNorm = 50.2996, GNorm = 1.8434, lr_0 = 9.0663e-04
Loss = 2.3046e-01, PNorm = 50.3168, GNorm = 1.5117, lr_0 = 9.1038e-04
Loss = 2.7230e-01, PNorm = 50.3431, GNorm = 1.5490, lr_0 = 9.1413e-04
Loss = 2.3618e-01, PNorm = 50.3589, GNorm = 1.0369, lr_0 = 9.1788e-04
Loss = 2.2980e-01, PNorm = 50.3746, GNorm = 0.9994, lr_0 = 9.2163e-04
Loss = 2.7411e-01, PNorm = 50.3913, GNorm = 0.8703, lr_0 = 9.2538e-04
Loss = 2.6872e-01, PNorm = 50.4120, GNorm = 1.0926, lr_0 = 9.2913e-04
Loss = 2.3405e-01, PNorm = 50.4348, GNorm = 1.7220, lr_0 = 9.3288e-04
Loss = 2.4589e-01, PNorm = 50.4606, GNorm = 3.3461, lr_0 = 9.3663e-04
Loss = 2.3462e-01, PNorm = 50.4855, GNorm = 1.5537, lr_0 = 9.4038e-04
Loss = 2.6214e-01, PNorm = 50.5099, GNorm = 2.3593, lr_0 = 9.4413e-04
Loss = 2.3205e-01, PNorm = 50.5281, GNorm = 3.4268, lr_0 = 9.4788e-04
Loss = 2.7541e-01, PNorm = 50.5492, GNorm = 1.0201, lr_0 = 9.5163e-04
Loss = 2.3345e-01, PNorm = 50.5663, GNorm = 1.2501, lr_0 = 9.5538e-04
Loss = 2.6072e-01, PNorm = 50.5891, GNorm = 1.3402, lr_0 = 9.5913e-04
Loss = 2.4020e-01, PNorm = 50.6094, GNorm = 0.9033, lr_0 = 9.6288e-04
Loss = 2.6138e-01, PNorm = 50.6261, GNorm = 1.7776, lr_0 = 9.6663e-04
Loss = 2.3032e-01, PNorm = 50.6517, GNorm = 2.2436, lr_0 = 9.7038e-04
Loss = 2.6004e-01, PNorm = 50.6735, GNorm = 1.3947, lr_0 = 9.7413e-04
Loss = 2.2499e-01, PNorm = 50.6902, GNorm = 2.3821, lr_0 = 9.7788e-04
Loss = 2.1648e-01, PNorm = 50.7149, GNorm = 3.2177, lr_0 = 9.8163e-04
Loss = 2.3531e-01, PNorm = 50.7282, GNorm = 2.6549, lr_0 = 9.8537e-04
Loss = 2.5612e-01, PNorm = 50.7506, GNorm = 2.9006, lr_0 = 9.8912e-04
Loss = 2.5874e-01, PNorm = 50.7740, GNorm = 1.5612, lr_0 = 9.9288e-04
Loss = 2.4900e-01, PNorm = 50.7962, GNorm = 3.5268, lr_0 = 9.9663e-04
Loss = 2.2632e-01, PNorm = 50.8208, GNorm = 4.3268, lr_0 = 9.9993e-04
Validation mae = 0.330422
Epoch 2
Loss = 2.4210e-01, PNorm = 50.8430, GNorm = 1.9647, lr_0 = 9.9925e-04
Loss = 2.8208e-01, PNorm = 50.8648, GNorm = 1.8085, lr_0 = 9.9856e-04
Loss = 2.4853e-01, PNorm = 50.8972, GNorm = 1.5356, lr_0 = 9.9788e-04
Loss = 2.4364e-01, PNorm = 50.9240, GNorm = 1.6216, lr_0 = 9.9719e-04
Loss = 2.3944e-01, PNorm = 50.9462, GNorm = 1.4660, lr_0 = 9.9651e-04
Loss = 2.2853e-01, PNorm = 50.9662, GNorm = 1.2066, lr_0 = 9.9583e-04
Loss = 2.7449e-01, PNorm = 50.9860, GNorm = 3.9936, lr_0 = 9.9515e-04
Loss = 2.2296e-01, PNorm = 51.0079, GNorm = 1.2629, lr_0 = 9.9446e-04
Loss = 2.5028e-01, PNorm = 51.0368, GNorm = 0.9800, lr_0 = 9.9378e-04
Loss = 2.5395e-01, PNorm = 51.0677, GNorm = 1.7374, lr_0 = 9.9310e-04
Loss = 2.2503e-01, PNorm = 51.0955, GNorm = 2.2741, lr_0 = 9.9242e-04
Loss = 2.5438e-01, PNorm = 51.1200, GNorm = 0.7461, lr_0 = 9.9174e-04
Loss = 2.4228e-01, PNorm = 51.1469, GNorm = 2.8572, lr_0 = 9.9106e-04
Loss = 2.3962e-01, PNorm = 51.1716, GNorm = 1.4788, lr_0 = 9.9038e-04
Loss = 2.2256e-01, PNorm = 51.2024, GNorm = 3.2444, lr_0 = 9.8971e-04
Loss = 2.2721e-01, PNorm = 51.2232, GNorm = 1.6096, lr_0 = 9.8903e-04
Loss = 2.2499e-01, PNorm = 51.2351, GNorm = 1.0721, lr_0 = 9.8835e-04
Loss = 2.4600e-01, PNorm = 51.2636, GNorm = 0.7651, lr_0 = 9.8767e-04
Loss = 2.5315e-01, PNorm = 51.2865, GNorm = 1.3615, lr_0 = 9.8700e-04
Loss = 2.4115e-01, PNorm = 51.3063, GNorm = 2.5681, lr_0 = 9.8632e-04
Loss = 2.1170e-01, PNorm = 51.3228, GNorm = 1.3158, lr_0 = 9.8564e-04
Loss = 2.5375e-01, PNorm = 51.3459, GNorm = 0.8337, lr_0 = 9.8497e-04
Loss = 2.4773e-01, PNorm = 51.3675, GNorm = 2.3480, lr_0 = 9.8429e-04
Loss = 2.2773e-01, PNorm = 51.3925, GNorm = 2.6882, lr_0 = 9.8362e-04
Loss = 2.7633e-01, PNorm = 51.4090, GNorm = 2.6413, lr_0 = 9.8295e-04
Loss = 2.2667e-01, PNorm = 51.4408, GNorm = 1.5465, lr_0 = 9.8227e-04
Loss = 2.0786e-01, PNorm = 51.4640, GNorm = 2.5180, lr_0 = 9.8160e-04
Loss = 2.1995e-01, PNorm = 51.4947, GNorm = 1.9420, lr_0 = 9.8093e-04
Loss = 2.4314e-01, PNorm = 51.5139, GNorm = 1.6419, lr_0 = 9.8026e-04
Loss = 2.6507e-01, PNorm = 51.5330, GNorm = 2.7264, lr_0 = 9.7958e-04
Loss = 2.4457e-01, PNorm = 51.5652, GNorm = 3.8662, lr_0 = 9.7891e-04
Loss = 2.5405e-01, PNorm = 51.5845, GNorm = 2.7554, lr_0 = 9.7824e-04
Loss = 2.4060e-01, PNorm = 51.6136, GNorm = 0.8734, lr_0 = 9.7757e-04
Loss = 2.4442e-01, PNorm = 51.6423, GNorm = 1.1306, lr_0 = 9.7690e-04
Loss = 2.4700e-01, PNorm = 51.6547, GNorm = 0.9292, lr_0 = 9.7623e-04
Loss = 2.1115e-01, PNorm = 51.6835, GNorm = 1.4664, lr_0 = 9.7556e-04
Loss = 2.2404e-01, PNorm = 51.6952, GNorm = 0.9622, lr_0 = 9.7490e-04
Loss = 2.2217e-01, PNorm = 51.7132, GNorm = 0.9392, lr_0 = 9.7423e-04
Loss = 2.2757e-01, PNorm = 51.7340, GNorm = 5.1769, lr_0 = 9.7356e-04
Loss = 2.3923e-01, PNorm = 51.7581, GNorm = 3.2574, lr_0 = 9.7289e-04
Loss = 2.5648e-01, PNorm = 51.7854, GNorm = 3.6738, lr_0 = 9.7223e-04
Loss = 2.5286e-01, PNorm = 51.8247, GNorm = 2.0537, lr_0 = 9.7156e-04
Loss = 2.5352e-01, PNorm = 51.8561, GNorm = 2.0773, lr_0 = 9.7090e-04
Loss = 2.5049e-01, PNorm = 51.8863, GNorm = 3.5662, lr_0 = 9.7023e-04
Loss = 2.3230e-01, PNorm = 51.9128, GNorm = 2.0413, lr_0 = 9.6957e-04
Loss = 2.1033e-01, PNorm = 51.9373, GNorm = 0.7678, lr_0 = 9.6890e-04
Loss = 2.4312e-01, PNorm = 51.9696, GNorm = 1.2859, lr_0 = 9.6824e-04
Loss = 2.1528e-01, PNorm = 51.9947, GNorm = 2.5426, lr_0 = 9.6757e-04
Loss = 2.3756e-01, PNorm = 52.0233, GNorm = 1.1654, lr_0 = 9.6691e-04
Loss = 2.3322e-01, PNorm = 52.0279, GNorm = 1.3051, lr_0 = 9.6625e-04
Loss = 2.2545e-01, PNorm = 52.0545, GNorm = 1.9516, lr_0 = 9.6559e-04
Loss = 2.3536e-01, PNorm = 52.0745, GNorm = 1.1912, lr_0 = 9.6493e-04
Loss = 2.4763e-01, PNorm = 52.1003, GNorm = 3.6627, lr_0 = 9.6427e-04
Loss = 2.4320e-01, PNorm = 52.1375, GNorm = 0.6641, lr_0 = 9.6360e-04
Loss = 2.3445e-01, PNorm = 52.1598, GNorm = 1.1564, lr_0 = 9.6294e-04
Loss = 2.1536e-01, PNorm = 52.1807, GNorm = 1.7225, lr_0 = 9.6228e-04
Loss = 2.2723e-01, PNorm = 52.2065, GNorm = 1.2678, lr_0 = 9.6163e-04
Loss = 2.3615e-01, PNorm = 52.2310, GNorm = 1.6260, lr_0 = 9.6097e-04
Loss = 2.1392e-01, PNorm = 52.2522, GNorm = 1.3905, lr_0 = 9.6031e-04
Loss = 2.5691e-01, PNorm = 52.2695, GNorm = 0.8715, lr_0 = 9.5965e-04
Loss = 2.1953e-01, PNorm = 52.3022, GNorm = 1.1111, lr_0 = 9.5899e-04
Loss = 2.4392e-01, PNorm = 52.3216, GNorm = 3.1351, lr_0 = 9.5834e-04
Loss = 1.9990e-01, PNorm = 52.3452, GNorm = 1.9570, lr_0 = 9.5768e-04
Loss = 2.3393e-01, PNorm = 52.3633, GNorm = 1.3768, lr_0 = 9.5702e-04
Loss = 2.4097e-01, PNorm = 52.3800, GNorm = 0.8089, lr_0 = 9.5637e-04
Loss = 2.2256e-01, PNorm = 52.4154, GNorm = 1.0719, lr_0 = 9.5571e-04
Loss = 2.1024e-01, PNorm = 52.4392, GNorm = 0.7892, lr_0 = 9.5506e-04
Loss = 2.0342e-01, PNorm = 52.4574, GNorm = 1.4441, lr_0 = 9.5440e-04
Loss = 2.1682e-01, PNorm = 52.4761, GNorm = 1.7329, lr_0 = 9.5375e-04
Loss = 2.2718e-01, PNorm = 52.4924, GNorm = 0.7312, lr_0 = 9.5310e-04
Loss = 2.4712e-01, PNorm = 52.5138, GNorm = 2.5127, lr_0 = 9.5244e-04
Loss = 2.1354e-01, PNorm = 52.5282, GNorm = 0.8132, lr_0 = 9.5179e-04
Loss = 2.5132e-01, PNorm = 52.5540, GNorm = 1.5001, lr_0 = 9.5114e-04
Loss = 2.2760e-01, PNorm = 52.5897, GNorm = 0.9815, lr_0 = 9.5049e-04
Loss = 2.5049e-01, PNorm = 52.6240, GNorm = 1.7365, lr_0 = 9.4984e-04
Loss = 2.3959e-01, PNorm = 52.6527, GNorm = 2.7549, lr_0 = 9.4919e-04
Loss = 2.5715e-01, PNorm = 52.6770, GNorm = 3.6592, lr_0 = 9.4854e-04
Loss = 2.2327e-01, PNorm = 52.7023, GNorm = 2.9087, lr_0 = 9.4789e-04
Loss = 2.0368e-01, PNorm = 52.7275, GNorm = 0.7155, lr_0 = 9.4724e-04
Loss = 2.4074e-01, PNorm = 52.7577, GNorm = 1.9592, lr_0 = 9.4659e-04
Loss = 2.2963e-01, PNorm = 52.7898, GNorm = 2.8291, lr_0 = 9.4594e-04
Loss = 2.5076e-01, PNorm = 52.8130, GNorm = 1.0853, lr_0 = 9.4529e-04
Loss = 2.2010e-01, PNorm = 52.8340, GNorm = 1.3363, lr_0 = 9.4464e-04
Loss = 2.0980e-01, PNorm = 52.8632, GNorm = 2.2755, lr_0 = 9.4400e-04
Loss = 2.3189e-01, PNorm = 52.8870, GNorm = 3.4092, lr_0 = 9.4335e-04
Loss = 2.4297e-01, PNorm = 52.9196, GNorm = 0.7724, lr_0 = 9.4270e-04
Loss = 3.0928e-01, PNorm = 52.9562, GNorm = 1.9651, lr_0 = 9.4206e-04
Loss = 2.0515e-01, PNorm = 52.9906, GNorm = 0.9973, lr_0 = 9.4141e-04
Loss = 1.9853e-01, PNorm = 53.0020, GNorm = 0.7710, lr_0 = 9.4077e-04
Loss = 2.2115e-01, PNorm = 53.0265, GNorm = 1.5066, lr_0 = 9.4012e-04
Loss = 2.1522e-01, PNorm = 53.0466, GNorm = 1.2177, lr_0 = 9.3948e-04
Loss = 2.2035e-01, PNorm = 53.0673, GNorm = 3.8119, lr_0 = 9.3884e-04
Loss = 2.2569e-01, PNorm = 53.0909, GNorm = 1.2302, lr_0 = 9.3819e-04
Loss = 2.2508e-01, PNorm = 53.1281, GNorm = 0.8625, lr_0 = 9.3755e-04
Loss = 2.4598e-01, PNorm = 53.1540, GNorm = 2.2988, lr_0 = 9.3691e-04
Loss = 2.3530e-01, PNorm = 53.1892, GNorm = 0.8814, lr_0 = 9.3627e-04
Loss = 2.1670e-01, PNorm = 53.2114, GNorm = 2.8921, lr_0 = 9.3562e-04
Loss = 2.1359e-01, PNorm = 53.2483, GNorm = 1.7078, lr_0 = 9.3498e-04
Loss = 2.4220e-01, PNorm = 53.2700, GNorm = 3.2178, lr_0 = 9.3434e-04
Loss = 2.2908e-01, PNorm = 53.3054, GNorm = 0.8164, lr_0 = 9.3370e-04
Loss = 2.2008e-01, PNorm = 53.3295, GNorm = 0.9620, lr_0 = 9.3306e-04
Loss = 2.5039e-01, PNorm = 53.3524, GNorm = 0.8055, lr_0 = 9.3242e-04
Loss = 2.2002e-01, PNorm = 53.3779, GNorm = 1.6155, lr_0 = 9.3178e-04
Loss = 2.0348e-01, PNorm = 53.4012, GNorm = 1.4610, lr_0 = 9.3115e-04
Loss = 2.0668e-01, PNorm = 53.4273, GNorm = 0.8192, lr_0 = 9.3051e-04
Loss = 1.9903e-01, PNorm = 53.4351, GNorm = 1.6992, lr_0 = 9.2987e-04
Loss = 2.4373e-01, PNorm = 53.4545, GNorm = 1.7029, lr_0 = 9.2923e-04
Loss = 2.3882e-01, PNorm = 53.4736, GNorm = 2.2382, lr_0 = 9.2860e-04
Loss = 2.4364e-01, PNorm = 53.4932, GNorm = 1.2474, lr_0 = 9.2796e-04
Loss = 2.5184e-01, PNorm = 53.5236, GNorm = 1.8518, lr_0 = 9.2733e-04
Loss = 2.1679e-01, PNorm = 53.5594, GNorm = 0.9487, lr_0 = 9.2669e-04
Loss = 2.2712e-01, PNorm = 53.5792, GNorm = 1.8957, lr_0 = 9.2606e-04
Loss = 2.4354e-01, PNorm = 53.6092, GNorm = 0.8546, lr_0 = 9.2542e-04
Loss = 2.1112e-01, PNorm = 53.6248, GNorm = 2.4058, lr_0 = 9.2479e-04
Loss = 2.4066e-01, PNorm = 53.6487, GNorm = 1.8387, lr_0 = 9.2415e-04
Loss = 2.3158e-01, PNorm = 53.6749, GNorm = 1.3557, lr_0 = 9.2352e-04
Loss = 2.3517e-01, PNorm = 53.7010, GNorm = 1.4807, lr_0 = 9.2289e-04
Loss = 2.3016e-01, PNorm = 53.7266, GNorm = 3.0406, lr_0 = 9.2226e-04
Loss = 2.1133e-01, PNorm = 53.7493, GNorm = 0.7386, lr_0 = 9.2162e-04
Loss = 1.9931e-01, PNorm = 53.7811, GNorm = 3.4681, lr_0 = 9.2099e-04
Validation mae = 0.276087
Epoch 3
Loss = 1.9577e-01, PNorm = 53.7945, GNorm = 1.5280, lr_0 = 9.2036e-04
Loss = 2.1815e-01, PNorm = 53.8265, GNorm = 1.3745, lr_0 = 9.1973e-04
Loss = 2.0946e-01, PNorm = 53.8566, GNorm = 0.7592, lr_0 = 9.1910e-04
Loss = 2.0735e-01, PNorm = 53.8835, GNorm = 1.0607, lr_0 = 9.1847e-04
Loss = 2.3542e-01, PNorm = 53.9131, GNorm = 0.7246, lr_0 = 9.1784e-04
Loss = 2.4458e-01, PNorm = 53.9323, GNorm = 1.6735, lr_0 = 9.1721e-04
Loss = 2.3412e-01, PNorm = 53.9710, GNorm = 0.5882, lr_0 = 9.1658e-04
Loss = 2.2586e-01, PNorm = 54.0017, GNorm = 0.5512, lr_0 = 9.1596e-04
Loss = 1.8298e-01, PNorm = 54.0254, GNorm = 2.3092, lr_0 = 9.1533e-04
Loss = 2.2409e-01, PNorm = 54.0529, GNorm = 1.4046, lr_0 = 9.1470e-04
Loss = 2.1917e-01, PNorm = 54.0628, GNorm = 1.2777, lr_0 = 9.1408e-04
Loss = 1.9336e-01, PNorm = 54.0905, GNorm = 1.1420, lr_0 = 9.1345e-04
Loss = 2.1484e-01, PNorm = 54.1116, GNorm = 1.7966, lr_0 = 9.1282e-04
Loss = 2.0072e-01, PNorm = 54.1451, GNorm = 1.4795, lr_0 = 9.1220e-04
Loss = 2.1205e-01, PNorm = 54.1721, GNorm = 2.7350, lr_0 = 9.1157e-04
Loss = 2.3272e-01, PNorm = 54.1962, GNorm = 2.4810, lr_0 = 9.1095e-04
Loss = 2.1611e-01, PNorm = 54.2156, GNorm = 1.1300, lr_0 = 9.1032e-04
Loss = 1.7752e-01, PNorm = 54.2348, GNorm = 1.3934, lr_0 = 9.0970e-04
Loss = 1.7140e-01, PNorm = 54.2507, GNorm = 0.9084, lr_0 = 9.0908e-04
Loss = 2.0769e-01, PNorm = 54.2641, GNorm = 1.4394, lr_0 = 9.0846e-04
Loss = 2.1363e-01, PNorm = 54.2905, GNorm = 1.3348, lr_0 = 9.0783e-04
Loss = 1.8817e-01, PNorm = 54.3137, GNorm = 1.1201, lr_0 = 9.0721e-04
Loss = 2.0931e-01, PNorm = 54.3489, GNorm = 0.9884, lr_0 = 9.0659e-04
Loss = 2.0606e-01, PNorm = 54.3700, GNorm = 2.0255, lr_0 = 9.0597e-04
Loss = 2.3505e-01, PNorm = 54.3976, GNorm = 1.3061, lr_0 = 9.0535e-04
Loss = 2.3724e-01, PNorm = 54.4174, GNorm = 3.4081, lr_0 = 9.0473e-04
Loss = 2.0425e-01, PNorm = 54.4489, GNorm = 1.4616, lr_0 = 9.0411e-04
Loss = 2.0689e-01, PNorm = 54.4698, GNorm = 1.9287, lr_0 = 9.0349e-04
Loss = 2.2474e-01, PNorm = 54.4978, GNorm = 3.4117, lr_0 = 9.0287e-04
Loss = 2.4182e-01, PNorm = 54.5256, GNorm = 3.1383, lr_0 = 9.0225e-04
Loss = 2.2638e-01, PNorm = 54.5588, GNorm = 2.2210, lr_0 = 9.0163e-04
Loss = 2.3028e-01, PNorm = 54.5932, GNorm = 0.8639, lr_0 = 9.0102e-04
Loss = 2.3550e-01, PNorm = 54.6282, GNorm = 1.4721, lr_0 = 9.0040e-04
Loss = 2.0616e-01, PNorm = 54.6620, GNorm = 1.0190, lr_0 = 8.9978e-04
Loss = 2.1334e-01, PNorm = 54.6735, GNorm = 1.0145, lr_0 = 8.9916e-04
Loss = 2.3149e-01, PNorm = 54.7003, GNorm = 1.5738, lr_0 = 8.9855e-04
Loss = 2.0548e-01, PNorm = 54.7141, GNorm = 1.0870, lr_0 = 8.9793e-04
Loss = 2.0804e-01, PNorm = 54.7359, GNorm = 2.4362, lr_0 = 8.9732e-04
Loss = 2.1258e-01, PNorm = 54.7518, GNorm = 1.1836, lr_0 = 8.9670e-04
Loss = 2.3006e-01, PNorm = 54.7859, GNorm = 1.9686, lr_0 = 8.9609e-04
Loss = 2.2483e-01, PNorm = 54.8084, GNorm = 1.3393, lr_0 = 8.9548e-04
Loss = 2.1777e-01, PNorm = 54.8448, GNorm = 3.2358, lr_0 = 8.9486e-04
Loss = 1.7499e-01, PNorm = 54.8766, GNorm = 1.2159, lr_0 = 8.9425e-04
Loss = 2.0635e-01, PNorm = 54.9107, GNorm = 2.2476, lr_0 = 8.9364e-04
Loss = 2.2288e-01, PNorm = 54.9381, GNorm = 1.3587, lr_0 = 8.9302e-04
Loss = 2.1630e-01, PNorm = 54.9633, GNorm = 1.8786, lr_0 = 8.9241e-04
Loss = 2.2386e-01, PNorm = 54.9894, GNorm = 0.7713, lr_0 = 8.9180e-04
Loss = 1.9567e-01, PNorm = 55.0120, GNorm = 0.9093, lr_0 = 8.9119e-04
Loss = 2.1403e-01, PNorm = 55.0378, GNorm = 1.2122, lr_0 = 8.9058e-04
Loss = 2.0469e-01, PNorm = 55.0694, GNorm = 1.6273, lr_0 = 8.8997e-04
Loss = 2.1969e-01, PNorm = 55.0873, GNorm = 0.8690, lr_0 = 8.8936e-04
Loss = 1.9840e-01, PNorm = 55.1072, GNorm = 1.2068, lr_0 = 8.8875e-04
Loss = 2.1006e-01, PNorm = 55.1366, GNorm = 2.6039, lr_0 = 8.8814e-04
Loss = 2.2334e-01, PNorm = 55.1632, GNorm = 1.7488, lr_0 = 8.8753e-04
Loss = 2.1017e-01, PNorm = 55.1923, GNorm = 1.2974, lr_0 = 8.8693e-04
Loss = 2.0660e-01, PNorm = 55.2183, GNorm = 1.8761, lr_0 = 8.8632e-04
Loss = 1.9730e-01, PNorm = 55.2497, GNorm = 2.2124, lr_0 = 8.8571e-04
Loss = 1.9908e-01, PNorm = 55.2677, GNorm = 1.1327, lr_0 = 8.8510e-04
Loss = 1.7264e-01, PNorm = 55.2878, GNorm = 0.6834, lr_0 = 8.8450e-04
Loss = 2.2368e-01, PNorm = 55.3130, GNorm = 1.0422, lr_0 = 8.8389e-04
Loss = 1.9864e-01, PNorm = 55.3360, GNorm = 1.9617, lr_0 = 8.8329e-04
Loss = 2.3899e-01, PNorm = 55.3646, GNorm = 1.2894, lr_0 = 8.8268e-04
Loss = 1.9718e-01, PNorm = 55.3877, GNorm = 0.7280, lr_0 = 8.8208e-04
Loss = 2.1013e-01, PNorm = 55.4081, GNorm = 2.3139, lr_0 = 8.8147e-04
Loss = 2.0612e-01, PNorm = 55.4251, GNorm = 1.1876, lr_0 = 8.8087e-04
Loss = 2.3254e-01, PNorm = 55.4512, GNorm = 1.3658, lr_0 = 8.8026e-04
Loss = 1.8182e-01, PNorm = 55.4783, GNorm = 0.7815, lr_0 = 8.7966e-04
Loss = 1.9612e-01, PNorm = 55.5107, GNorm = 1.0778, lr_0 = 8.7906e-04
Loss = 1.9284e-01, PNorm = 55.5345, GNorm = 0.7930, lr_0 = 8.7846e-04
Loss = 2.0006e-01, PNorm = 55.5579, GNorm = 1.2454, lr_0 = 8.7785e-04
Loss = 2.0831e-01, PNorm = 55.5837, GNorm = 0.5608, lr_0 = 8.7725e-04
Loss = 2.2488e-01, PNorm = 55.6067, GNorm = 0.5475, lr_0 = 8.7665e-04
Loss = 1.9359e-01, PNorm = 55.6320, GNorm = 0.6185, lr_0 = 8.7605e-04
Loss = 1.8170e-01, PNorm = 55.6525, GNorm = 1.0016, lr_0 = 8.7545e-04
Loss = 1.9553e-01, PNorm = 55.6702, GNorm = 1.2591, lr_0 = 8.7485e-04
Loss = 1.9856e-01, PNorm = 55.6958, GNorm = 0.9140, lr_0 = 8.7425e-04
Loss = 2.2514e-01, PNorm = 55.7234, GNorm = 1.8889, lr_0 = 8.7365e-04
Loss = 1.8903e-01, PNorm = 55.7536, GNorm = 1.0022, lr_0 = 8.7306e-04
Loss = 1.7397e-01, PNorm = 55.7815, GNorm = 1.3169, lr_0 = 8.7246e-04
Loss = 1.9342e-01, PNorm = 55.8152, GNorm = 0.9520, lr_0 = 8.7186e-04
Loss = 1.9399e-01, PNorm = 55.8336, GNorm = 0.7641, lr_0 = 8.7126e-04
Loss = 2.0504e-01, PNorm = 55.8494, GNorm = 0.5857, lr_0 = 8.7067e-04
Loss = 1.8877e-01, PNorm = 55.8729, GNorm = 1.4510, lr_0 = 8.7007e-04
Loss = 2.3709e-01, PNorm = 55.9006, GNorm = 0.8701, lr_0 = 8.6947e-04
Loss = 2.1888e-01, PNorm = 55.9388, GNorm = 0.9489, lr_0 = 8.6888e-04
Loss = 2.2793e-01, PNorm = 55.9620, GNorm = 1.9523, lr_0 = 8.6828e-04
Loss = 2.4682e-01, PNorm = 55.9809, GNorm = 1.1203, lr_0 = 8.6769e-04
Loss = 1.6772e-01, PNorm = 56.0089, GNorm = 1.8223, lr_0 = 8.6709e-04
Loss = 2.1063e-01, PNorm = 56.0332, GNorm = 0.5687, lr_0 = 8.6650e-04
Loss = 1.9618e-01, PNorm = 56.0550, GNorm = 1.2905, lr_0 = 8.6590e-04
Loss = 2.0704e-01, PNorm = 56.0866, GNorm = 2.1946, lr_0 = 8.6531e-04
Loss = 1.9058e-01, PNorm = 56.1162, GNorm = 1.7402, lr_0 = 8.6472e-04
Loss = 1.8823e-01, PNorm = 56.1390, GNorm = 0.8546, lr_0 = 8.6413e-04
Loss = 2.1678e-01, PNorm = 56.1616, GNorm = 1.5590, lr_0 = 8.6353e-04
Loss = 1.9855e-01, PNorm = 56.1841, GNorm = 0.9149, lr_0 = 8.6294e-04
Loss = 2.0822e-01, PNorm = 56.2108, GNorm = 0.8175, lr_0 = 8.6235e-04
Loss = 2.8667e-01, PNorm = 56.2411, GNorm = 2.5504, lr_0 = 8.6176e-04
Loss = 2.8002e-01, PNorm = 56.2869, GNorm = 1.8557, lr_0 = 8.6117e-04
Loss = 2.0843e-01, PNorm = 56.3246, GNorm = 1.0439, lr_0 = 8.6058e-04
Loss = 2.1260e-01, PNorm = 56.3626, GNorm = 1.0044, lr_0 = 8.5999e-04
Loss = 1.8248e-01, PNorm = 56.3827, GNorm = 1.6194, lr_0 = 8.5940e-04
Loss = 2.0330e-01, PNorm = 56.3958, GNorm = 0.6539, lr_0 = 8.5881e-04
Loss = 2.0619e-01, PNorm = 56.4158, GNorm = 2.8949, lr_0 = 8.5823e-04
Loss = 2.1838e-01, PNorm = 56.4468, GNorm = 0.8196, lr_0 = 8.5764e-04
Loss = 2.0993e-01, PNorm = 56.4707, GNorm = 1.0200, lr_0 = 8.5705e-04
Loss = 1.8053e-01, PNorm = 56.4928, GNorm = 1.1132, lr_0 = 8.5646e-04
Loss = 1.8838e-01, PNorm = 56.5134, GNorm = 0.5983, lr_0 = 8.5588e-04
Loss = 2.0869e-01, PNorm = 56.5355, GNorm = 0.6906, lr_0 = 8.5529e-04
Loss = 2.0175e-01, PNorm = 56.5586, GNorm = 1.1902, lr_0 = 8.5470e-04
Loss = 1.8175e-01, PNorm = 56.5815, GNorm = 1.5380, lr_0 = 8.5412e-04
Loss = 1.8969e-01, PNorm = 56.6041, GNorm = 0.8612, lr_0 = 8.5353e-04
Loss = 1.9562e-01, PNorm = 56.6293, GNorm = 1.1770, lr_0 = 8.5295e-04
Loss = 2.0633e-01, PNorm = 56.6459, GNorm = 1.8775, lr_0 = 8.5236e-04
Loss = 2.2815e-01, PNorm = 56.6731, GNorm = 1.3728, lr_0 = 8.5178e-04
Loss = 1.8635e-01, PNorm = 56.6872, GNorm = 0.7547, lr_0 = 8.5120e-04
Loss = 1.9581e-01, PNorm = 56.7171, GNorm = 1.6638, lr_0 = 8.5061e-04
Loss = 1.9837e-01, PNorm = 56.7461, GNorm = 0.8667, lr_0 = 8.5003e-04
Loss = 1.9399e-01, PNorm = 56.7697, GNorm = 1.0321, lr_0 = 8.4945e-04
Loss = 1.9105e-01, PNorm = 56.7930, GNorm = 1.6735, lr_0 = 8.4887e-04
Loss = 1.8040e-01, PNorm = 56.8074, GNorm = 0.8103, lr_0 = 8.4828e-04
Validation mae = 0.268513
Epoch 4
Loss = 1.8085e-01, PNorm = 56.8234, GNorm = 1.4287, lr_0 = 8.4770e-04
Loss = 2.0717e-01, PNorm = 56.8426, GNorm = 0.8349, lr_0 = 8.4712e-04
Loss = 2.0876e-01, PNorm = 56.8639, GNorm = 1.4477, lr_0 = 8.4654e-04
Loss = 1.6251e-01, PNorm = 56.8999, GNorm = 1.8115, lr_0 = 8.4596e-04
Loss = 1.7968e-01, PNorm = 56.9322, GNorm = 1.7111, lr_0 = 8.4538e-04
Loss = 1.6833e-01, PNorm = 56.9578, GNorm = 0.7969, lr_0 = 8.4480e-04
Loss = 1.6663e-01, PNorm = 56.9777, GNorm = 1.5479, lr_0 = 8.4423e-04
Loss = 2.0591e-01, PNorm = 56.9996, GNorm = 1.8289, lr_0 = 8.4365e-04
Loss = 2.2370e-01, PNorm = 57.0241, GNorm = 2.5828, lr_0 = 8.4307e-04
Loss = 2.1877e-01, PNorm = 57.0585, GNorm = 1.2540, lr_0 = 8.4249e-04
Loss = 2.0113e-01, PNorm = 57.0858, GNorm = 0.8540, lr_0 = 8.4191e-04
Loss = 1.7179e-01, PNorm = 57.1127, GNorm = 0.9910, lr_0 = 8.4134e-04
Loss = 1.9662e-01, PNorm = 57.1383, GNorm = 0.9525, lr_0 = 8.4076e-04
Loss = 1.8847e-01, PNorm = 57.1595, GNorm = 0.8454, lr_0 = 8.4019e-04
Loss = 1.8745e-01, PNorm = 57.1860, GNorm = 1.0546, lr_0 = 8.3961e-04
Loss = 2.0789e-01, PNorm = 57.2154, GNorm = 1.0108, lr_0 = 8.3903e-04
Loss = 1.6246e-01, PNorm = 57.2346, GNorm = 0.6282, lr_0 = 8.3846e-04
Loss = 1.8613e-01, PNorm = 57.2658, GNorm = 2.9886, lr_0 = 8.3789e-04
Loss = 1.9591e-01, PNorm = 57.2934, GNorm = 0.9274, lr_0 = 8.3731e-04
Loss = 1.7679e-01, PNorm = 57.3255, GNorm = 1.2767, lr_0 = 8.3674e-04
Loss = 1.8331e-01, PNorm = 57.3512, GNorm = 0.7971, lr_0 = 8.3616e-04
Loss = 1.9790e-01, PNorm = 57.3666, GNorm = 1.1996, lr_0 = 8.3559e-04
Loss = 1.9763e-01, PNorm = 57.3843, GNorm = 1.3482, lr_0 = 8.3502e-04
Loss = 1.9548e-01, PNorm = 57.4168, GNorm = 0.6524, lr_0 = 8.3445e-04
Loss = 1.8787e-01, PNorm = 57.4369, GNorm = 1.1020, lr_0 = 8.3388e-04
Loss = 2.1837e-01, PNorm = 57.4750, GNorm = 1.1217, lr_0 = 8.3330e-04
Loss = 2.0281e-01, PNorm = 57.4962, GNorm = 1.4081, lr_0 = 8.3273e-04
Loss = 1.7896e-01, PNorm = 57.5219, GNorm = 1.2055, lr_0 = 8.3216e-04
Loss = 1.9413e-01, PNorm = 57.5449, GNorm = 1.0626, lr_0 = 8.3159e-04
Loss = 1.9922e-01, PNorm = 57.5631, GNorm = 0.8822, lr_0 = 8.3102e-04
Loss = 2.0796e-01, PNorm = 57.5813, GNorm = 0.7313, lr_0 = 8.3045e-04
Loss = 1.9953e-01, PNorm = 57.6093, GNorm = 2.0940, lr_0 = 8.2988e-04
Loss = 2.0739e-01, PNorm = 57.6437, GNorm = 3.1334, lr_0 = 8.2932e-04
Loss = 1.9870e-01, PNorm = 57.6845, GNorm = 1.7812, lr_0 = 8.2875e-04
Loss = 2.2827e-01, PNorm = 57.7193, GNorm = 1.7325, lr_0 = 8.2818e-04
Loss = 1.8647e-01, PNorm = 57.7473, GNorm = 0.9101, lr_0 = 8.2761e-04
Loss = 1.7944e-01, PNorm = 57.7671, GNorm = 0.7646, lr_0 = 8.2705e-04
Loss = 1.9402e-01, PNorm = 57.7936, GNorm = 0.9072, lr_0 = 8.2648e-04
Loss = 1.9470e-01, PNorm = 57.8125, GNorm = 1.2247, lr_0 = 8.2591e-04
Loss = 1.6898e-01, PNorm = 57.8331, GNorm = 1.2810, lr_0 = 8.2535e-04
Loss = 1.8588e-01, PNorm = 57.8553, GNorm = 1.0682, lr_0 = 8.2478e-04
Loss = 1.8960e-01, PNorm = 57.8753, GNorm = 1.5434, lr_0 = 8.2422e-04
Loss = 2.0085e-01, PNorm = 57.8981, GNorm = 1.0404, lr_0 = 8.2365e-04
Loss = 1.7110e-01, PNorm = 57.9188, GNorm = 0.7760, lr_0 = 8.2309e-04
Loss = 1.9590e-01, PNorm = 57.9463, GNorm = 1.2309, lr_0 = 8.2252e-04
Loss = 1.7450e-01, PNorm = 57.9674, GNorm = 1.5960, lr_0 = 8.2196e-04
Loss = 1.7323e-01, PNorm = 57.9913, GNorm = 0.7888, lr_0 = 8.2140e-04
Loss = 1.9207e-01, PNorm = 58.0194, GNorm = 0.7222, lr_0 = 8.2084e-04
Loss = 1.8923e-01, PNorm = 58.0391, GNorm = 0.6970, lr_0 = 8.2027e-04
Loss = 1.5170e-01, PNorm = 58.0504, GNorm = 0.7423, lr_0 = 8.1971e-04
Loss = 1.7507e-01, PNorm = 58.0720, GNorm = 1.1550, lr_0 = 8.1915e-04
Loss = 1.9608e-01, PNorm = 58.0977, GNorm = 1.3749, lr_0 = 8.1859e-04
Loss = 2.0703e-01, PNorm = 58.1224, GNorm = 0.9519, lr_0 = 8.1803e-04
Loss = 1.7474e-01, PNorm = 58.1504, GNorm = 1.0717, lr_0 = 8.1747e-04
Loss = 2.0855e-01, PNorm = 58.1688, GNorm = 2.3248, lr_0 = 8.1691e-04
Loss = 1.7959e-01, PNorm = 58.1929, GNorm = 0.7536, lr_0 = 8.1635e-04
Loss = 1.9054e-01, PNorm = 58.2179, GNorm = 1.5818, lr_0 = 8.1579e-04
Loss = 1.8434e-01, PNorm = 58.2451, GNorm = 0.6152, lr_0 = 8.1523e-04
Loss = 1.9548e-01, PNorm = 58.2766, GNorm = 1.3211, lr_0 = 8.1467e-04
Loss = 2.0424e-01, PNorm = 58.2917, GNorm = 1.6991, lr_0 = 8.1411e-04
Loss = 2.0909e-01, PNorm = 58.3181, GNorm = 0.7502, lr_0 = 8.1355e-04
Loss = 2.0469e-01, PNorm = 58.3470, GNorm = 0.7642, lr_0 = 8.1300e-04
Loss = 1.8830e-01, PNorm = 58.3688, GNorm = 0.9402, lr_0 = 8.1244e-04
Loss = 2.0494e-01, PNorm = 58.3925, GNorm = 0.9706, lr_0 = 8.1188e-04
Loss = 1.8824e-01, PNorm = 58.4227, GNorm = 1.2952, lr_0 = 8.1133e-04
Loss = 1.9405e-01, PNorm = 58.4535, GNorm = 1.7310, lr_0 = 8.1077e-04
Loss = 1.8697e-01, PNorm = 58.4774, GNorm = 1.2241, lr_0 = 8.1022e-04
Loss = 1.8125e-01, PNorm = 58.4957, GNorm = 0.7328, lr_0 = 8.0966e-04
Loss = 1.6956e-01, PNorm = 58.5177, GNorm = 0.8713, lr_0 = 8.0911e-04
Loss = 1.9551e-01, PNorm = 58.5429, GNorm = 0.8037, lr_0 = 8.0855e-04
Loss = 1.6027e-01, PNorm = 58.5605, GNorm = 0.5374, lr_0 = 8.0800e-04
Loss = 1.7305e-01, PNorm = 58.5853, GNorm = 1.3140, lr_0 = 8.0745e-04
Loss = 1.7981e-01, PNorm = 58.6033, GNorm = 0.6202, lr_0 = 8.0689e-04
Loss = 1.9374e-01, PNorm = 58.6273, GNorm = 1.2111, lr_0 = 8.0634e-04
Loss = 2.0310e-01, PNorm = 58.6495, GNorm = 1.1045, lr_0 = 8.0579e-04
Loss = 1.8457e-01, PNorm = 58.6704, GNorm = 1.2361, lr_0 = 8.0523e-04
Loss = 1.8213e-01, PNorm = 58.7006, GNorm = 0.7061, lr_0 = 8.0468e-04
Loss = 2.1018e-01, PNorm = 58.7221, GNorm = 2.4714, lr_0 = 8.0413e-04
Loss = 2.0372e-01, PNorm = 58.7474, GNorm = 1.1060, lr_0 = 8.0358e-04
Loss = 1.9381e-01, PNorm = 58.7786, GNorm = 0.5447, lr_0 = 8.0303e-04
Loss = 1.8155e-01, PNorm = 58.8018, GNorm = 0.6897, lr_0 = 8.0248e-04
Loss = 1.8976e-01, PNorm = 58.8225, GNorm = 1.0327, lr_0 = 8.0193e-04
Loss = 1.9754e-01, PNorm = 58.8504, GNorm = 1.1794, lr_0 = 8.0138e-04
Loss = 2.0278e-01, PNorm = 58.8691, GNorm = 0.8404, lr_0 = 8.0083e-04
Loss = 1.7249e-01, PNorm = 58.8983, GNorm = 0.9925, lr_0 = 8.0028e-04
Loss = 1.8709e-01, PNorm = 58.9187, GNorm = 0.9252, lr_0 = 7.9974e-04
Loss = 1.7991e-01, PNorm = 58.9419, GNorm = 0.8603, lr_0 = 7.9919e-04
Loss = 1.7256e-01, PNorm = 58.9624, GNorm = 1.0230, lr_0 = 7.9864e-04
Loss = 1.9635e-01, PNorm = 58.9831, GNorm = 1.4059, lr_0 = 7.9809e-04
Loss = 2.1526e-01, PNorm = 59.0110, GNorm = 1.2071, lr_0 = 7.9755e-04
Loss = 1.7954e-01, PNorm = 59.0285, GNorm = 1.0669, lr_0 = 7.9700e-04
Loss = 2.0303e-01, PNorm = 59.0508, GNorm = 1.6267, lr_0 = 7.9645e-04
Loss = 1.8294e-01, PNorm = 59.0749, GNorm = 1.4788, lr_0 = 7.9591e-04
Loss = 1.9928e-01, PNorm = 59.1016, GNorm = 0.7224, lr_0 = 7.9536e-04
Loss = 1.7908e-01, PNorm = 59.1286, GNorm = 0.7874, lr_0 = 7.9482e-04
Loss = 1.7765e-01, PNorm = 59.1453, GNorm = 1.9891, lr_0 = 7.9427e-04
Loss = 1.7667e-01, PNorm = 59.1773, GNorm = 1.1935, lr_0 = 7.9373e-04
Loss = 1.7718e-01, PNorm = 59.1985, GNorm = 1.0694, lr_0 = 7.9319e-04
Loss = 2.0527e-01, PNorm = 59.2304, GNorm = 1.2405, lr_0 = 7.9264e-04
Loss = 1.7379e-01, PNorm = 59.2500, GNorm = 0.8159, lr_0 = 7.9210e-04
Loss = 1.8413e-01, PNorm = 59.2775, GNorm = 0.8914, lr_0 = 7.9156e-04
Loss = 1.7930e-01, PNorm = 59.2968, GNorm = 1.4130, lr_0 = 7.9101e-04
Loss = 1.8486e-01, PNorm = 59.3147, GNorm = 0.9592, lr_0 = 7.9047e-04
Loss = 1.6444e-01, PNorm = 59.3348, GNorm = 0.8366, lr_0 = 7.8993e-04
Loss = 1.6495e-01, PNorm = 59.3597, GNorm = 1.0767, lr_0 = 7.8939e-04
Loss = 1.8929e-01, PNorm = 59.3772, GNorm = 1.0615, lr_0 = 7.8885e-04
Loss = 1.6661e-01, PNorm = 59.3990, GNorm = 0.8349, lr_0 = 7.8831e-04
Loss = 1.4918e-01, PNorm = 59.4235, GNorm = 1.2485, lr_0 = 7.8777e-04
Loss = 2.0585e-01, PNorm = 59.4444, GNorm = 0.7440, lr_0 = 7.8723e-04
Loss = 1.8958e-01, PNorm = 59.4821, GNorm = 0.9420, lr_0 = 7.8669e-04
Loss = 2.0685e-01, PNorm = 59.5113, GNorm = 1.1688, lr_0 = 7.8615e-04
Loss = 1.8861e-01, PNorm = 59.5438, GNorm = 0.4758, lr_0 = 7.8561e-04
Loss = 1.9432e-01, PNorm = 59.5653, GNorm = 1.9492, lr_0 = 7.8507e-04
Loss = 1.8165e-01, PNorm = 59.5828, GNorm = 0.8949, lr_0 = 7.8454e-04
Loss = 2.2577e-01, PNorm = 59.6045, GNorm = 3.1596, lr_0 = 7.8400e-04
Loss = 2.0686e-01, PNorm = 59.6357, GNorm = 1.2761, lr_0 = 7.8346e-04
Loss = 1.9077e-01, PNorm = 59.6586, GNorm = 0.8029, lr_0 = 7.8293e-04
Loss = 2.0755e-01, PNorm = 59.6827, GNorm = 1.0777, lr_0 = 7.8239e-04
Loss = 1.8188e-01, PNorm = 59.7023, GNorm = 0.7220, lr_0 = 7.8185e-04
Loss = 2.0025e-01, PNorm = 59.7278, GNorm = 0.6477, lr_0 = 7.8132e-04
Validation mae = 0.260019
Epoch 5
Loss = 1.7426e-01, PNorm = 59.7446, GNorm = 0.5685, lr_0 = 7.8078e-04
Loss = 1.6853e-01, PNorm = 59.7719, GNorm = 0.7775, lr_0 = 7.8025e-04
Loss = 1.7878e-01, PNorm = 59.7958, GNorm = 1.1524, lr_0 = 7.7971e-04
Loss = 1.7605e-01, PNorm = 59.8200, GNorm = 1.1758, lr_0 = 7.7918e-04
Loss = 1.7622e-01, PNorm = 59.8385, GNorm = 1.6092, lr_0 = 7.7864e-04
Loss = 1.5750e-01, PNorm = 59.8670, GNorm = 0.9046, lr_0 = 7.7811e-04
Loss = 1.7211e-01, PNorm = 59.8861, GNorm = 0.9594, lr_0 = 7.7758e-04
Loss = 1.8256e-01, PNorm = 59.9002, GNorm = 0.7758, lr_0 = 7.7705e-04
Loss = 1.6948e-01, PNorm = 59.9186, GNorm = 0.9298, lr_0 = 7.7651e-04
Loss = 1.6665e-01, PNorm = 59.9417, GNorm = 2.3718, lr_0 = 7.7598e-04
Loss = 1.7746e-01, PNorm = 59.9749, GNorm = 1.0338, lr_0 = 7.7545e-04
Loss = 2.0146e-01, PNorm = 60.0032, GNorm = 1.0941, lr_0 = 7.7492e-04
Loss = 1.9818e-01, PNorm = 60.0387, GNorm = 1.1615, lr_0 = 7.7439e-04
Loss = 1.6758e-01, PNorm = 60.0600, GNorm = 0.6499, lr_0 = 7.7386e-04
Loss = 1.7426e-01, PNorm = 60.0778, GNorm = 0.6949, lr_0 = 7.7333e-04
Loss = 1.6342e-01, PNorm = 60.1059, GNorm = 1.1402, lr_0 = 7.7280e-04
Loss = 1.7354e-01, PNorm = 60.1317, GNorm = 0.8396, lr_0 = 7.7227e-04
Loss = 1.4944e-01, PNorm = 60.1560, GNorm = 0.6269, lr_0 = 7.7174e-04
Loss = 1.6102e-01, PNorm = 60.1752, GNorm = 0.7209, lr_0 = 7.7121e-04
Loss = 2.0274e-01, PNorm = 60.1979, GNorm = 1.8267, lr_0 = 7.7068e-04
Loss = 1.8811e-01, PNorm = 60.2295, GNorm = 1.2623, lr_0 = 7.7015e-04
Loss = 1.5800e-01, PNorm = 60.2528, GNorm = 0.7066, lr_0 = 7.6963e-04
Loss = 1.6936e-01, PNorm = 60.2762, GNorm = 1.4774, lr_0 = 7.6910e-04
Loss = 1.7481e-01, PNorm = 60.2979, GNorm = 1.4890, lr_0 = 7.6857e-04
Loss = 1.7079e-01, PNorm = 60.3248, GNorm = 0.8697, lr_0 = 7.6805e-04
Loss = 1.8615e-01, PNorm = 60.3461, GNorm = 1.0559, lr_0 = 7.6752e-04
Loss = 1.7672e-01, PNorm = 60.3727, GNorm = 0.6206, lr_0 = 7.6699e-04
Loss = 1.9254e-01, PNorm = 60.3935, GNorm = 0.9477, lr_0 = 7.6647e-04
Loss = 1.8393e-01, PNorm = 60.4221, GNorm = 1.2979, lr_0 = 7.6594e-04
Loss = 1.8911e-01, PNorm = 60.4385, GNorm = 0.8661, lr_0 = 7.6542e-04
Loss = 1.9432e-01, PNorm = 60.4698, GNorm = 1.2895, lr_0 = 7.6489e-04
Loss = 1.6527e-01, PNorm = 60.4919, GNorm = 0.6103, lr_0 = 7.6437e-04
Loss = 2.0156e-01, PNorm = 60.5238, GNorm = 2.4237, lr_0 = 7.6385e-04
Loss = 1.9339e-01, PNorm = 60.5530, GNorm = 0.8402, lr_0 = 7.6332e-04
Loss = 1.6581e-01, PNorm = 60.5751, GNorm = 1.5318, lr_0 = 7.6280e-04
Loss = 1.8519e-01, PNorm = 60.6015, GNorm = 1.5815, lr_0 = 7.6228e-04
Loss = 2.0716e-01, PNorm = 60.6305, GNorm = 1.1533, lr_0 = 7.6176e-04
Loss = 1.7711e-01, PNorm = 60.6610, GNorm = 1.0788, lr_0 = 7.6123e-04
Loss = 1.7250e-01, PNorm = 60.6804, GNorm = 0.8071, lr_0 = 7.6071e-04
Loss = 1.7913e-01, PNorm = 60.6986, GNorm = 1.0342, lr_0 = 7.6019e-04
Loss = 1.8172e-01, PNorm = 60.7188, GNorm = 0.6661, lr_0 = 7.5967e-04
Loss = 1.7263e-01, PNorm = 60.7386, GNorm = 0.8394, lr_0 = 7.5915e-04
Loss = 1.7415e-01, PNorm = 60.7673, GNorm = 1.2916, lr_0 = 7.5863e-04
Loss = 1.6959e-01, PNorm = 60.7921, GNorm = 1.3325, lr_0 = 7.5811e-04
Loss = 1.7635e-01, PNorm = 60.8107, GNorm = 0.9450, lr_0 = 7.5759e-04
Loss = 1.8026e-01, PNorm = 60.8347, GNorm = 0.9387, lr_0 = 7.5707e-04
Loss = 1.7609e-01, PNorm = 60.8624, GNorm = 0.9301, lr_0 = 7.5655e-04
Loss = 1.9597e-01, PNorm = 60.8908, GNorm = 0.8818, lr_0 = 7.5603e-04
Loss = 1.8619e-01, PNorm = 60.9212, GNorm = 0.9675, lr_0 = 7.5552e-04
Loss = 1.7618e-01, PNorm = 60.9371, GNorm = 0.7065, lr_0 = 7.5500e-04
Loss = 1.7887e-01, PNorm = 60.9682, GNorm = 0.7354, lr_0 = 7.5448e-04
Loss = 1.6571e-01, PNorm = 60.9868, GNorm = 1.7717, lr_0 = 7.5397e-04
Loss = 1.7317e-01, PNorm = 61.0108, GNorm = 0.8408, lr_0 = 7.5345e-04
Loss = 1.7950e-01, PNorm = 61.0335, GNorm = 1.0483, lr_0 = 7.5293e-04
Loss = 1.6397e-01, PNorm = 61.0522, GNorm = 0.9127, lr_0 = 7.5242e-04
Loss = 1.7688e-01, PNorm = 61.0790, GNorm = 0.9588, lr_0 = 7.5190e-04
Loss = 1.7266e-01, PNorm = 61.1094, GNorm = 0.8338, lr_0 = 7.5139e-04
Loss = 1.7782e-01, PNorm = 61.1322, GNorm = 0.4567, lr_0 = 7.5087e-04
Loss = 1.7427e-01, PNorm = 61.1553, GNorm = 1.5067, lr_0 = 7.5036e-04
Loss = 1.5837e-01, PNorm = 61.1811, GNorm = 0.6392, lr_0 = 7.4984e-04
Loss = 1.6895e-01, PNorm = 61.2003, GNorm = 1.2814, lr_0 = 7.4933e-04
Loss = 1.8476e-01, PNorm = 61.2268, GNorm = 1.5982, lr_0 = 7.4882e-04
Loss = 2.2167e-01, PNorm = 61.2508, GNorm = 0.8498, lr_0 = 7.4830e-04
Loss = 1.9382e-01, PNorm = 61.2897, GNorm = 1.3388, lr_0 = 7.4779e-04
Loss = 1.8024e-01, PNorm = 61.3160, GNorm = 1.1318, lr_0 = 7.4728e-04
Loss = 1.5536e-01, PNorm = 61.3397, GNorm = 0.8068, lr_0 = 7.4677e-04
Loss = 1.8084e-01, PNorm = 61.3602, GNorm = 1.2082, lr_0 = 7.4625e-04
Loss = 1.7836e-01, PNorm = 61.3780, GNorm = 0.9935, lr_0 = 7.4574e-04
Loss = 1.7664e-01, PNorm = 61.4065, GNorm = 1.2730, lr_0 = 7.4523e-04
Loss = 1.6234e-01, PNorm = 61.4315, GNorm = 0.5989, lr_0 = 7.4472e-04
Loss = 1.6914e-01, PNorm = 61.4528, GNorm = 1.5983, lr_0 = 7.4421e-04
Loss = 1.6653e-01, PNorm = 61.4694, GNorm = 0.8850, lr_0 = 7.4370e-04
Loss = 1.5937e-01, PNorm = 61.4856, GNorm = 0.8092, lr_0 = 7.4319e-04
Loss = 1.6745e-01, PNorm = 61.5130, GNorm = 1.4611, lr_0 = 7.4268e-04
Loss = 1.8088e-01, PNorm = 61.5432, GNorm = 1.2332, lr_0 = 7.4217e-04
Loss = 1.6826e-01, PNorm = 61.5698, GNorm = 0.9032, lr_0 = 7.4167e-04
Loss = 1.7682e-01, PNorm = 61.5869, GNorm = 0.6993, lr_0 = 7.4116e-04
Loss = 1.8294e-01, PNorm = 61.6115, GNorm = 1.0502, lr_0 = 7.4065e-04
Loss = 1.6605e-01, PNorm = 61.6273, GNorm = 1.1141, lr_0 = 7.4014e-04
Loss = 1.8490e-01, PNorm = 61.6551, GNorm = 1.2735, lr_0 = 7.3964e-04
Loss = 1.7996e-01, PNorm = 61.6798, GNorm = 0.6506, lr_0 = 7.3913e-04
Loss = 1.5942e-01, PNorm = 61.7063, GNorm = 0.6952, lr_0 = 7.3862e-04
Loss = 1.6577e-01, PNorm = 61.7330, GNorm = 0.9905, lr_0 = 7.3812e-04
Loss = 1.6563e-01, PNorm = 61.7577, GNorm = 0.9419, lr_0 = 7.3761e-04
Loss = 1.8083e-01, PNorm = 61.7848, GNorm = 0.6688, lr_0 = 7.3711e-04
Loss = 1.6231e-01, PNorm = 61.8024, GNorm = 1.0874, lr_0 = 7.3660e-04
Loss = 1.8694e-01, PNorm = 61.8250, GNorm = 0.6067, lr_0 = 7.3610e-04
Loss = 1.5996e-01, PNorm = 61.8500, GNorm = 1.1552, lr_0 = 7.3559e-04
Loss = 1.7423e-01, PNorm = 61.8685, GNorm = 1.0479, lr_0 = 7.3509e-04
Loss = 1.8217e-01, PNorm = 61.8886, GNorm = 0.8739, lr_0 = 7.3458e-04
Loss = 1.9449e-01, PNorm = 61.9071, GNorm = 0.8965, lr_0 = 7.3408e-04
Loss = 2.1418e-01, PNorm = 61.9329, GNorm = 1.3490, lr_0 = 7.3358e-04
Loss = 1.6927e-01, PNorm = 61.9551, GNorm = 0.7189, lr_0 = 7.3308e-04
Loss = 2.0909e-01, PNorm = 61.9826, GNorm = 1.1794, lr_0 = 7.3257e-04
Loss = 2.2428e-01, PNorm = 62.0177, GNorm = 1.4179, lr_0 = 7.3207e-04
Loss = 1.8957e-01, PNorm = 62.0395, GNorm = 1.8776, lr_0 = 7.3157e-04
Loss = 1.8277e-01, PNorm = 62.0630, GNorm = 1.0168, lr_0 = 7.3107e-04
Loss = 1.6141e-01, PNorm = 62.0891, GNorm = 0.9020, lr_0 = 7.3057e-04
Loss = 1.7182e-01, PNorm = 62.1191, GNorm = 0.6562, lr_0 = 7.3007e-04
Loss = 2.0031e-01, PNorm = 62.1481, GNorm = 1.2559, lr_0 = 7.2957e-04
Loss = 2.0969e-01, PNorm = 62.1777, GNorm = 1.3805, lr_0 = 7.2907e-04
Loss = 1.6496e-01, PNorm = 62.1965, GNorm = 0.7394, lr_0 = 7.2857e-04
Loss = 1.7841e-01, PNorm = 62.2147, GNorm = 0.7329, lr_0 = 7.2807e-04
Loss = 1.6635e-01, PNorm = 62.2361, GNorm = 0.7934, lr_0 = 7.2757e-04
Loss = 1.6144e-01, PNorm = 62.2533, GNorm = 0.9755, lr_0 = 7.2707e-04
Loss = 1.8531e-01, PNorm = 62.2805, GNorm = 1.5983, lr_0 = 7.2657e-04
Loss = 1.8254e-01, PNorm = 62.3002, GNorm = 1.2020, lr_0 = 7.2608e-04
Loss = 1.8476e-01, PNorm = 62.3210, GNorm = 0.6424, lr_0 = 7.2558e-04
Loss = 1.5326e-01, PNorm = 62.3435, GNorm = 0.7300, lr_0 = 7.2508e-04
Loss = 1.5523e-01, PNorm = 62.3636, GNorm = 1.1401, lr_0 = 7.2458e-04
Loss = 1.9435e-01, PNorm = 62.3835, GNorm = 0.8627, lr_0 = 7.2409e-04
Loss = 1.7246e-01, PNorm = 62.4003, GNorm = 0.5425, lr_0 = 7.2359e-04
Loss = 1.8639e-01, PNorm = 62.4255, GNorm = 0.8671, lr_0 = 7.2310e-04
Loss = 1.7536e-01, PNorm = 62.4424, GNorm = 1.0664, lr_0 = 7.2260e-04
Loss = 1.5918e-01, PNorm = 62.4649, GNorm = 0.6778, lr_0 = 7.2211e-04
Loss = 1.8328e-01, PNorm = 62.4914, GNorm = 0.7902, lr_0 = 7.2161e-04
Loss = 1.7472e-01, PNorm = 62.5116, GNorm = 0.8863, lr_0 = 7.2112e-04
Loss = 1.6437e-01, PNorm = 62.5311, GNorm = 1.0903, lr_0 = 7.2062e-04
Loss = 1.8070e-01, PNorm = 62.5512, GNorm = 0.9485, lr_0 = 7.2013e-04
Loss = 1.8651e-01, PNorm = 62.5706, GNorm = 0.8339, lr_0 = 7.1964e-04
Validation mae = 0.253141
Epoch 6
Loss = 1.8941e-01, PNorm = 62.5996, GNorm = 1.2007, lr_0 = 7.1914e-04
Loss = 1.6026e-01, PNorm = 62.6218, GNorm = 0.5825, lr_0 = 7.1865e-04
Loss = 1.6155e-01, PNorm = 62.6395, GNorm = 0.9030, lr_0 = 7.1816e-04
Loss = 1.4304e-01, PNorm = 62.6634, GNorm = 0.5904, lr_0 = 7.1767e-04
Loss = 1.5670e-01, PNorm = 62.6833, GNorm = 0.6841, lr_0 = 7.1717e-04
Loss = 1.7007e-01, PNorm = 62.7071, GNorm = 1.0819, lr_0 = 7.1668e-04
Loss = 1.6879e-01, PNorm = 62.7370, GNorm = 0.7719, lr_0 = 7.1619e-04
Loss = 1.5621e-01, PNorm = 62.7645, GNorm = 1.1128, lr_0 = 7.1570e-04
Loss = 1.5694e-01, PNorm = 62.7961, GNorm = 0.6399, lr_0 = 7.1521e-04
Loss = 1.6366e-01, PNorm = 62.8117, GNorm = 0.7170, lr_0 = 7.1472e-04
Loss = 1.6244e-01, PNorm = 62.8278, GNorm = 0.9889, lr_0 = 7.1423e-04
Loss = 1.5719e-01, PNorm = 62.8496, GNorm = 0.6617, lr_0 = 7.1374e-04
Loss = 1.7705e-01, PNorm = 62.8674, GNorm = 1.3199, lr_0 = 7.1325e-04
Loss = 1.5990e-01, PNorm = 62.8990, GNorm = 0.7068, lr_0 = 7.1277e-04
Loss = 1.8526e-01, PNorm = 62.9204, GNorm = 1.2959, lr_0 = 7.1228e-04
Loss = 1.5940e-01, PNorm = 62.9516, GNorm = 1.1403, lr_0 = 7.1179e-04
Loss = 1.4953e-01, PNorm = 62.9687, GNorm = 0.6148, lr_0 = 7.1130e-04
Loss = 1.5882e-01, PNorm = 62.9940, GNorm = 1.0813, lr_0 = 7.1081e-04
Loss = 1.6889e-01, PNorm = 63.0121, GNorm = 0.5848, lr_0 = 7.1033e-04
Loss = 1.5704e-01, PNorm = 63.0320, GNorm = 0.8272, lr_0 = 7.0984e-04
Loss = 1.7189e-01, PNorm = 63.0596, GNorm = 0.5925, lr_0 = 7.0935e-04
Loss = 1.7046e-01, PNorm = 63.0846, GNorm = 0.7386, lr_0 = 7.0887e-04
Loss = 1.4923e-01, PNorm = 63.1079, GNorm = 0.7391, lr_0 = 7.0838e-04
Loss = 2.0067e-01, PNorm = 63.1301, GNorm = 0.8756, lr_0 = 7.0790e-04
Loss = 1.7446e-01, PNorm = 63.1531, GNorm = 0.6604, lr_0 = 7.0741e-04
Loss = 1.5435e-01, PNorm = 63.1737, GNorm = 1.1090, lr_0 = 7.0693e-04
Loss = 1.7424e-01, PNorm = 63.2010, GNorm = 0.7132, lr_0 = 7.0644e-04
Loss = 1.5901e-01, PNorm = 63.2198, GNorm = 0.7196, lr_0 = 7.0596e-04
Loss = 1.7354e-01, PNorm = 63.2434, GNorm = 0.5062, lr_0 = 7.0548e-04
Loss = 1.7306e-01, PNorm = 63.2629, GNorm = 2.2810, lr_0 = 7.0499e-04
Loss = 1.7800e-01, PNorm = 63.2856, GNorm = 1.3895, lr_0 = 7.0451e-04
Loss = 1.9684e-01, PNorm = 63.3147, GNorm = 1.0891, lr_0 = 7.0403e-04
Loss = 2.0049e-01, PNorm = 63.3386, GNorm = 0.9280, lr_0 = 7.0354e-04
Loss = 2.0049e-01, PNorm = 63.3703, GNorm = 0.8141, lr_0 = 7.0306e-04
Loss = 2.0062e-01, PNorm = 63.3908, GNorm = 0.8633, lr_0 = 7.0258e-04
Loss = 1.6324e-01, PNorm = 63.4162, GNorm = 1.7200, lr_0 = 7.0210e-04
Loss = 1.5311e-01, PNorm = 63.4309, GNorm = 0.9087, lr_0 = 7.0162e-04
Loss = 1.5992e-01, PNorm = 63.4528, GNorm = 1.1070, lr_0 = 7.0114e-04
Loss = 1.8153e-01, PNorm = 63.4693, GNorm = 1.2536, lr_0 = 7.0066e-04
Loss = 1.7449e-01, PNorm = 63.4937, GNorm = 0.8163, lr_0 = 7.0018e-04
Loss = 1.7169e-01, PNorm = 63.5184, GNorm = 0.7886, lr_0 = 6.9970e-04
Loss = 1.6648e-01, PNorm = 63.5317, GNorm = 0.5425, lr_0 = 6.9922e-04
Loss = 1.5982e-01, PNorm = 63.5487, GNorm = 1.5575, lr_0 = 6.9874e-04
Loss = 1.7133e-01, PNorm = 63.5766, GNorm = 1.3262, lr_0 = 6.9826e-04
Loss = 1.7498e-01, PNorm = 63.6015, GNorm = 1.1379, lr_0 = 6.9778e-04
Loss = 1.9722e-01, PNorm = 63.6303, GNorm = 1.2061, lr_0 = 6.9730e-04
Loss = 1.5127e-01, PNorm = 63.6539, GNorm = 1.0648, lr_0 = 6.9683e-04
Loss = 1.8343e-01, PNorm = 63.6811, GNorm = 1.5483, lr_0 = 6.9635e-04
Loss = 1.6844e-01, PNorm = 63.7145, GNorm = 0.6155, lr_0 = 6.9587e-04
Loss = 1.7110e-01, PNorm = 63.7439, GNorm = 1.4060, lr_0 = 6.9540e-04
Loss = 1.7009e-01, PNorm = 63.7676, GNorm = 1.2365, lr_0 = 6.9492e-04
Loss = 1.6320e-01, PNorm = 63.7940, GNorm = 1.0199, lr_0 = 6.9444e-04
Loss = 1.4382e-01, PNorm = 63.8134, GNorm = 0.7262, lr_0 = 6.9397e-04
Loss = 1.7026e-01, PNorm = 63.8365, GNorm = 0.6174, lr_0 = 6.9349e-04
Loss = 1.6130e-01, PNorm = 63.8605, GNorm = 0.6150, lr_0 = 6.9302e-04
Loss = 1.8381e-01, PNorm = 63.8851, GNorm = 1.1813, lr_0 = 6.9254e-04
Loss = 1.6599e-01, PNorm = 63.9099, GNorm = 1.0457, lr_0 = 6.9207e-04
Loss = 1.7036e-01, PNorm = 63.9271, GNorm = 0.9824, lr_0 = 6.9159e-04
Loss = 1.6126e-01, PNorm = 63.9516, GNorm = 0.6418, lr_0 = 6.9112e-04
Loss = 1.6882e-01, PNorm = 63.9691, GNorm = 0.7657, lr_0 = 6.9065e-04
Loss = 1.5414e-01, PNorm = 63.9836, GNorm = 0.7552, lr_0 = 6.9017e-04
Loss = 1.7697e-01, PNorm = 64.0027, GNorm = 0.6781, lr_0 = 6.8970e-04
Loss = 1.4600e-01, PNorm = 64.0173, GNorm = 0.7123, lr_0 = 6.8923e-04
Loss = 1.5826e-01, PNorm = 64.0424, GNorm = 1.5169, lr_0 = 6.8876e-04
Loss = 1.5387e-01, PNorm = 64.0575, GNorm = 0.9987, lr_0 = 6.8828e-04
Loss = 1.5467e-01, PNorm = 64.0780, GNorm = 0.7455, lr_0 = 6.8781e-04
Loss = 1.5946e-01, PNorm = 64.1001, GNorm = 0.7503, lr_0 = 6.8734e-04
Loss = 1.5097e-01, PNorm = 64.1243, GNorm = 1.1350, lr_0 = 6.8687e-04
Loss = 1.6425e-01, PNorm = 64.1525, GNorm = 1.2946, lr_0 = 6.8640e-04
Loss = 1.5963e-01, PNorm = 64.1781, GNorm = 0.9614, lr_0 = 6.8593e-04
Loss = 1.5341e-01, PNorm = 64.1983, GNorm = 0.3300, lr_0 = 6.8546e-04
Loss = 1.8466e-01, PNorm = 64.2236, GNorm = 0.6808, lr_0 = 6.8499e-04
Loss = 1.6785e-01, PNorm = 64.2465, GNorm = 1.4456, lr_0 = 6.8452e-04
Loss = 1.8316e-01, PNorm = 64.2666, GNorm = 0.8869, lr_0 = 6.8405e-04
Loss = 1.7148e-01, PNorm = 64.2864, GNorm = 0.6403, lr_0 = 6.8358e-04
Loss = 1.7540e-01, PNorm = 64.3092, GNorm = 0.5998, lr_0 = 6.8312e-04
Loss = 1.4328e-01, PNorm = 64.3391, GNorm = 0.7173, lr_0 = 6.8265e-04
Loss = 1.8135e-01, PNorm = 64.3623, GNorm = 0.7095, lr_0 = 6.8218e-04
Loss = 1.7161e-01, PNorm = 64.3902, GNorm = 0.9048, lr_0 = 6.8171e-04
Loss = 1.5328e-01, PNorm = 64.4135, GNorm = 0.8318, lr_0 = 6.8125e-04
Loss = 1.6876e-01, PNorm = 64.4355, GNorm = 0.7554, lr_0 = 6.8078e-04
Loss = 1.8272e-01, PNorm = 64.4631, GNorm = 1.1279, lr_0 = 6.8031e-04
Loss = 1.5407e-01, PNorm = 64.4897, GNorm = 1.1973, lr_0 = 6.7985e-04
Loss = 1.6165e-01, PNorm = 64.5074, GNorm = 0.5698, lr_0 = 6.7938e-04
Loss = 1.7447e-01, PNorm = 64.5302, GNorm = 0.8266, lr_0 = 6.7892e-04
Loss = 1.7402e-01, PNorm = 64.5547, GNorm = 0.8351, lr_0 = 6.7845e-04
Loss = 1.5627e-01, PNorm = 64.5791, GNorm = 0.8229, lr_0 = 6.7799e-04
Loss = 1.4206e-01, PNorm = 64.5997, GNorm = 0.5555, lr_0 = 6.7752e-04
Loss = 1.7109e-01, PNorm = 64.6186, GNorm = 2.0953, lr_0 = 6.7706e-04
Loss = 1.5603e-01, PNorm = 64.6368, GNorm = 0.9204, lr_0 = 6.7659e-04
Loss = 1.7162e-01, PNorm = 64.6564, GNorm = 1.1881, lr_0 = 6.7613e-04
Loss = 1.6803e-01, PNorm = 64.6734, GNorm = 0.5820, lr_0 = 6.7567e-04
Loss = 1.5883e-01, PNorm = 64.6948, GNorm = 0.6363, lr_0 = 6.7520e-04
Loss = 1.7097e-01, PNorm = 64.7189, GNorm = 0.8236, lr_0 = 6.7474e-04
Loss = 1.4178e-01, PNorm = 64.7377, GNorm = 0.8496, lr_0 = 6.7428e-04
Loss = 1.5674e-01, PNorm = 64.7502, GNorm = 0.6805, lr_0 = 6.7382e-04
Loss = 1.6585e-01, PNorm = 64.7738, GNorm = 1.1731, lr_0 = 6.7335e-04
Loss = 1.5247e-01, PNorm = 64.8006, GNorm = 0.7704, lr_0 = 6.7289e-04
Loss = 1.6970e-01, PNorm = 64.8257, GNorm = 1.2986, lr_0 = 6.7243e-04
Loss = 1.8218e-01, PNorm = 64.8511, GNorm = 0.9833, lr_0 = 6.7197e-04
Loss = 1.6244e-01, PNorm = 64.8660, GNorm = 0.6850, lr_0 = 6.7151e-04
Loss = 1.8048e-01, PNorm = 64.8908, GNorm = 0.7326, lr_0 = 6.7105e-04
Loss = 1.7551e-01, PNorm = 64.9197, GNorm = 0.8486, lr_0 = 6.7059e-04
Loss = 1.6652e-01, PNorm = 64.9462, GNorm = 0.6114, lr_0 = 6.7013e-04
Loss = 1.4743e-01, PNorm = 64.9740, GNorm = 1.0878, lr_0 = 6.6967e-04
Loss = 1.7850e-01, PNorm = 64.9973, GNorm = 0.9618, lr_0 = 6.6921e-04
Loss = 1.6492e-01, PNorm = 65.0182, GNorm = 1.0171, lr_0 = 6.6876e-04
Loss = 1.9526e-01, PNorm = 65.0398, GNorm = 1.0076, lr_0 = 6.6830e-04
Loss = 1.7773e-01, PNorm = 65.0699, GNorm = 1.9764, lr_0 = 6.6784e-04
Loss = 1.6943e-01, PNorm = 65.0928, GNorm = 0.7742, lr_0 = 6.6738e-04
Loss = 1.6761e-01, PNorm = 65.1119, GNorm = 0.8750, lr_0 = 6.6693e-04
Loss = 1.6715e-01, PNorm = 65.1347, GNorm = 0.5228, lr_0 = 6.6647e-04
Loss = 1.5134e-01, PNorm = 65.1601, GNorm = 1.2987, lr_0 = 6.6601e-04
Loss = 1.7343e-01, PNorm = 65.1771, GNorm = 0.9299, lr_0 = 6.6556e-04
Loss = 1.5201e-01, PNorm = 65.1942, GNorm = 1.0547, lr_0 = 6.6510e-04
Loss = 1.7528e-01, PNorm = 65.2077, GNorm = 0.9126, lr_0 = 6.6464e-04
Loss = 1.6771e-01, PNorm = 65.2218, GNorm = 0.6651, lr_0 = 6.6419e-04
Loss = 1.6922e-01, PNorm = 65.2435, GNorm = 1.1216, lr_0 = 6.6373e-04
Loss = 1.8959e-01, PNorm = 65.2665, GNorm = 0.8952, lr_0 = 6.6328e-04
Loss = 1.6403e-01, PNorm = 65.2914, GNorm = 1.6838, lr_0 = 6.6282e-04
Validation mae = 0.242868
Epoch 7
Loss = 1.7236e-01, PNorm = 65.3140, GNorm = 0.7104, lr_0 = 6.6237e-04
Loss = 1.7714e-01, PNorm = 65.3418, GNorm = 1.1831, lr_0 = 6.6192e-04
Loss = 1.7792e-01, PNorm = 65.3755, GNorm = 1.0143, lr_0 = 6.6146e-04
Loss = 1.5662e-01, PNorm = 65.4050, GNorm = 0.8219, lr_0 = 6.6101e-04
Loss = 1.5594e-01, PNorm = 65.4262, GNorm = 0.6467, lr_0 = 6.6056e-04
Loss = 1.4244e-01, PNorm = 65.4456, GNorm = 0.6592, lr_0 = 6.6011e-04
Loss = 1.5697e-01, PNorm = 65.4686, GNorm = 0.8527, lr_0 = 6.5965e-04
Loss = 1.4490e-01, PNorm = 65.4929, GNorm = 0.5282, lr_0 = 6.5920e-04
Loss = 1.4810e-01, PNorm = 65.5099, GNorm = 0.8013, lr_0 = 6.5875e-04
Loss = 1.6540e-01, PNorm = 65.5281, GNorm = 0.6481, lr_0 = 6.5830e-04
Loss = 1.6955e-01, PNorm = 65.5538, GNorm = 1.0180, lr_0 = 6.5785e-04
Loss = 1.6285e-01, PNorm = 65.5686, GNorm = 0.7611, lr_0 = 6.5740e-04
Loss = 1.4060e-01, PNorm = 65.5912, GNorm = 0.7026, lr_0 = 6.5695e-04
Loss = 1.7046e-01, PNorm = 65.6107, GNorm = 0.6477, lr_0 = 6.5650e-04
Loss = 1.4497e-01, PNorm = 65.6333, GNorm = 0.7696, lr_0 = 6.5605e-04
Loss = 1.6186e-01, PNorm = 65.6569, GNorm = 0.6046, lr_0 = 6.5560e-04
Loss = 1.5871e-01, PNorm = 65.6746, GNorm = 0.7390, lr_0 = 6.5515e-04
Loss = 1.5617e-01, PNorm = 65.6880, GNorm = 1.3005, lr_0 = 6.5470e-04
Loss = 1.4871e-01, PNorm = 65.7052, GNorm = 0.6291, lr_0 = 6.5425e-04
Loss = 1.5534e-01, PNorm = 65.7257, GNorm = 0.8602, lr_0 = 6.5380e-04
Loss = 1.6002e-01, PNorm = 65.7453, GNorm = 0.8068, lr_0 = 6.5335e-04
Loss = 1.4971e-01, PNorm = 65.7629, GNorm = 0.9487, lr_0 = 6.5291e-04
Loss = 1.5380e-01, PNorm = 65.7892, GNorm = 1.0271, lr_0 = 6.5246e-04
Loss = 1.6336e-01, PNorm = 65.8132, GNorm = 1.0277, lr_0 = 6.5201e-04
Loss = 1.6722e-01, PNorm = 65.8344, GNorm = 1.7821, lr_0 = 6.5157e-04
Loss = 1.5789e-01, PNorm = 65.8552, GNorm = 0.6673, lr_0 = 6.5112e-04
Loss = 1.7441e-01, PNorm = 65.8750, GNorm = 0.8667, lr_0 = 6.5067e-04
Loss = 1.7693e-01, PNorm = 65.9031, GNorm = 1.7507, lr_0 = 6.5023e-04
Loss = 1.6406e-01, PNorm = 65.9286, GNorm = 1.7674, lr_0 = 6.4978e-04
Loss = 1.7528e-01, PNorm = 65.9473, GNorm = 0.8979, lr_0 = 6.4934e-04
Loss = 1.3578e-01, PNorm = 65.9609, GNorm = 1.1737, lr_0 = 6.4889e-04
Loss = 1.6081e-01, PNorm = 65.9770, GNorm = 0.6209, lr_0 = 6.4845e-04
Loss = 1.5252e-01, PNorm = 65.9954, GNorm = 0.5997, lr_0 = 6.4800e-04
Loss = 1.4240e-01, PNorm = 66.0184, GNorm = 0.9435, lr_0 = 6.4756e-04
Loss = 1.5020e-01, PNorm = 66.0372, GNorm = 0.7523, lr_0 = 6.4712e-04
Loss = 1.7779e-01, PNorm = 66.0695, GNorm = 0.6627, lr_0 = 6.4667e-04
Loss = 1.6346e-01, PNorm = 66.0926, GNorm = 1.0294, lr_0 = 6.4623e-04
Loss = 1.4480e-01, PNorm = 66.1060, GNorm = 0.6186, lr_0 = 6.4579e-04
Loss = 1.5376e-01, PNorm = 66.1200, GNorm = 0.8917, lr_0 = 6.4534e-04
Loss = 1.5301e-01, PNorm = 66.1349, GNorm = 0.6340, lr_0 = 6.4490e-04
Loss = 1.7089e-01, PNorm = 66.1565, GNorm = 0.7767, lr_0 = 6.4446e-04
Loss = 1.4783e-01, PNorm = 66.1771, GNorm = 1.1771, lr_0 = 6.4402e-04
Loss = 1.5899e-01, PNorm = 66.2032, GNorm = 1.0879, lr_0 = 6.4358e-04
Loss = 1.5068e-01, PNorm = 66.2208, GNorm = 1.0601, lr_0 = 6.4314e-04
Loss = 1.5323e-01, PNorm = 66.2370, GNorm = 0.8280, lr_0 = 6.4270e-04
Loss = 1.5198e-01, PNorm = 66.2540, GNorm = 0.6597, lr_0 = 6.4226e-04
Loss = 1.4626e-01, PNorm = 66.2705, GNorm = 0.5210, lr_0 = 6.4182e-04
Loss = 1.5545e-01, PNorm = 66.2901, GNorm = 0.9334, lr_0 = 6.4138e-04
Loss = 1.2446e-01, PNorm = 66.3115, GNorm = 0.6321, lr_0 = 6.4094e-04
Loss = 1.4965e-01, PNorm = 66.3327, GNorm = 1.0783, lr_0 = 6.4050e-04
Loss = 1.6468e-01, PNorm = 66.3577, GNorm = 0.9306, lr_0 = 6.4006e-04
Loss = 1.5759e-01, PNorm = 66.3860, GNorm = 0.6113, lr_0 = 6.3962e-04
Loss = 1.7475e-01, PNorm = 66.3969, GNorm = 0.6140, lr_0 = 6.3918e-04
Loss = 1.9802e-01, PNorm = 66.4148, GNorm = 1.0678, lr_0 = 6.3874e-04
Loss = 1.6363e-01, PNorm = 66.4329, GNorm = 0.4987, lr_0 = 6.3831e-04
Loss = 1.7124e-01, PNorm = 66.4450, GNorm = 1.3414, lr_0 = 6.3787e-04
Loss = 1.4666e-01, PNorm = 66.4715, GNorm = 0.9636, lr_0 = 6.3743e-04
Loss = 1.6708e-01, PNorm = 66.4879, GNorm = 1.0041, lr_0 = 6.3700e-04
Loss = 1.6243e-01, PNorm = 66.5189, GNorm = 0.8620, lr_0 = 6.3656e-04
Loss = 1.6726e-01, PNorm = 66.5509, GNorm = 0.7986, lr_0 = 6.3612e-04
Loss = 1.6517e-01, PNorm = 66.5835, GNorm = 1.5044, lr_0 = 6.3569e-04
Loss = 1.4713e-01, PNorm = 66.6076, GNorm = 1.2041, lr_0 = 6.3525e-04
Loss = 1.5637e-01, PNorm = 66.6267, GNorm = 0.6067, lr_0 = 6.3482e-04
Loss = 1.4747e-01, PNorm = 66.6399, GNorm = 1.2181, lr_0 = 6.3438e-04
Loss = 1.5964e-01, PNorm = 66.6606, GNorm = 0.6587, lr_0 = 6.3395e-04
Loss = 1.4811e-01, PNorm = 66.6874, GNorm = 0.6955, lr_0 = 6.3351e-04
Loss = 1.6350e-01, PNorm = 66.7114, GNorm = 0.6948, lr_0 = 6.3308e-04
Loss = 1.5624e-01, PNorm = 66.7367, GNorm = 0.8577, lr_0 = 6.3265e-04
Loss = 1.7186e-01, PNorm = 66.7615, GNorm = 1.3405, lr_0 = 6.3221e-04
Loss = 1.7253e-01, PNorm = 66.7876, GNorm = 1.1740, lr_0 = 6.3178e-04
Loss = 1.5710e-01, PNorm = 66.8067, GNorm = 1.0034, lr_0 = 6.3135e-04
Loss = 1.4461e-01, PNorm = 66.8346, GNorm = 0.7683, lr_0 = 6.3091e-04
Loss = 1.4379e-01, PNorm = 66.8500, GNorm = 0.6301, lr_0 = 6.3048e-04
Loss = 1.4095e-01, PNorm = 66.8713, GNorm = 0.6122, lr_0 = 6.3005e-04
Loss = 1.7235e-01, PNorm = 66.8885, GNorm = 1.1371, lr_0 = 6.2962e-04
Loss = 1.6612e-01, PNorm = 66.9102, GNorm = 0.5970, lr_0 = 6.2919e-04
Loss = 1.6496e-01, PNorm = 66.9278, GNorm = 0.7883, lr_0 = 6.2876e-04
Loss = 1.4615e-01, PNorm = 66.9439, GNorm = 1.2688, lr_0 = 6.2833e-04
Loss = 1.4750e-01, PNorm = 66.9605, GNorm = 0.7005, lr_0 = 6.2789e-04
Loss = 1.6029e-01, PNorm = 66.9863, GNorm = 1.0073, lr_0 = 6.2746e-04
Loss = 1.6981e-01, PNorm = 67.0020, GNorm = 1.1041, lr_0 = 6.2703e-04
Loss = 1.5583e-01, PNorm = 67.0224, GNorm = 0.6493, lr_0 = 6.2661e-04
Loss = 1.4425e-01, PNorm = 67.0347, GNorm = 0.7006, lr_0 = 6.2618e-04
Loss = 1.4010e-01, PNorm = 67.0483, GNorm = 0.9937, lr_0 = 6.2575e-04
Loss = 1.5349e-01, PNorm = 67.0604, GNorm = 1.3284, lr_0 = 6.2532e-04
Loss = 1.8178e-01, PNorm = 67.0822, GNorm = 1.1977, lr_0 = 6.2489e-04
Loss = 1.5734e-01, PNorm = 67.0995, GNorm = 1.2163, lr_0 = 6.2446e-04
Loss = 1.6711e-01, PNorm = 67.1267, GNorm = 0.6042, lr_0 = 6.2403e-04
Loss = 1.5345e-01, PNorm = 67.1472, GNorm = 0.7022, lr_0 = 6.2361e-04
Loss = 1.5393e-01, PNorm = 67.1723, GNorm = 0.8431, lr_0 = 6.2318e-04
Loss = 1.7605e-01, PNorm = 67.1878, GNorm = 1.0455, lr_0 = 6.2275e-04
Loss = 1.6491e-01, PNorm = 67.2141, GNorm = 1.0301, lr_0 = 6.2233e-04
Loss = 1.5608e-01, PNorm = 67.2372, GNorm = 1.1227, lr_0 = 6.2190e-04
Loss = 1.5796e-01, PNorm = 67.2557, GNorm = 0.8347, lr_0 = 6.2147e-04
Loss = 1.6259e-01, PNorm = 67.2742, GNorm = 1.0117, lr_0 = 6.2105e-04
Loss = 1.5365e-01, PNorm = 67.2963, GNorm = 0.6728, lr_0 = 6.2062e-04
Loss = 1.6322e-01, PNorm = 67.3191, GNorm = 0.6341, lr_0 = 6.2020e-04
Loss = 1.8770e-01, PNorm = 67.3377, GNorm = 0.9842, lr_0 = 6.1977e-04
Loss = 1.7007e-01, PNorm = 67.3618, GNorm = 1.4739, lr_0 = 6.1935e-04
Loss = 1.5535e-01, PNorm = 67.3919, GNorm = 0.5752, lr_0 = 6.1892e-04
Loss = 1.5970e-01, PNorm = 67.4150, GNorm = 0.8642, lr_0 = 6.1850e-04
Loss = 1.5567e-01, PNorm = 67.4321, GNorm = 1.1923, lr_0 = 6.1808e-04
Loss = 1.5871e-01, PNorm = 67.4479, GNorm = 1.3822, lr_0 = 6.1765e-04
Loss = 1.5063e-01, PNorm = 67.4619, GNorm = 0.9099, lr_0 = 6.1723e-04
Loss = 1.5953e-01, PNorm = 67.4843, GNorm = 0.8126, lr_0 = 6.1681e-04
Loss = 1.6942e-01, PNorm = 67.5032, GNorm = 0.8118, lr_0 = 6.1638e-04
Loss = 1.5196e-01, PNorm = 67.5270, GNorm = 1.0385, lr_0 = 6.1596e-04
Loss = 1.6127e-01, PNorm = 67.5344, GNorm = 0.8487, lr_0 = 6.1554e-04
Loss = 1.6633e-01, PNorm = 67.5457, GNorm = 1.0386, lr_0 = 6.1512e-04
Loss = 1.5158e-01, PNorm = 67.5595, GNorm = 0.8413, lr_0 = 6.1470e-04
Loss = 1.3295e-01, PNorm = 67.5826, GNorm = 0.9640, lr_0 = 6.1428e-04
Loss = 1.6398e-01, PNorm = 67.5978, GNorm = 0.6776, lr_0 = 6.1385e-04
Loss = 1.4413e-01, PNorm = 67.6172, GNorm = 1.5529, lr_0 = 6.1343e-04
Loss = 1.6732e-01, PNorm = 67.6436, GNorm = 0.7927, lr_0 = 6.1301e-04
Loss = 1.4140e-01, PNorm = 67.6636, GNorm = 1.0782, lr_0 = 6.1259e-04
Loss = 1.4680e-01, PNorm = 67.6828, GNorm = 0.5404, lr_0 = 6.1217e-04
Loss = 1.5560e-01, PNorm = 67.7004, GNorm = 1.2277, lr_0 = 6.1175e-04
Loss = 1.5601e-01, PNorm = 67.7171, GNorm = 0.8241, lr_0 = 6.1134e-04
Loss = 1.6798e-01, PNorm = 67.7314, GNorm = 0.8698, lr_0 = 6.1092e-04
Loss = 1.5644e-01, PNorm = 67.7455, GNorm = 1.9665, lr_0 = 6.1050e-04
Validation mae = 0.245824
Epoch 8
Loss = 1.2951e-01, PNorm = 67.7593, GNorm = 0.7588, lr_0 = 6.1008e-04
Loss = 1.5188e-01, PNorm = 67.7831, GNorm = 0.9460, lr_0 = 6.0966e-04
Loss = 1.4714e-01, PNorm = 67.7994, GNorm = 0.9434, lr_0 = 6.0924e-04
Loss = 1.4128e-01, PNorm = 67.8190, GNorm = 0.5521, lr_0 = 6.0883e-04
Loss = 1.4604e-01, PNorm = 67.8346, GNorm = 0.9670, lr_0 = 6.0841e-04
Loss = 1.4382e-01, PNorm = 67.8573, GNorm = 1.8561, lr_0 = 6.0799e-04
Loss = 1.3401e-01, PNorm = 67.8704, GNorm = 0.8540, lr_0 = 6.0758e-04
Loss = 1.4597e-01, PNorm = 67.8901, GNorm = 1.5049, lr_0 = 6.0716e-04
Loss = 1.4054e-01, PNorm = 67.9088, GNorm = 0.5498, lr_0 = 6.0674e-04
Loss = 1.3705e-01, PNorm = 67.9269, GNorm = 0.6872, lr_0 = 6.0633e-04
Loss = 1.2732e-01, PNorm = 67.9459, GNorm = 0.5154, lr_0 = 6.0591e-04
Loss = 1.4731e-01, PNorm = 67.9588, GNorm = 0.7268, lr_0 = 6.0550e-04
Loss = 1.5887e-01, PNorm = 67.9767, GNorm = 0.6983, lr_0 = 6.0508e-04
Loss = 1.4807e-01, PNorm = 68.0015, GNorm = 1.0379, lr_0 = 6.0467e-04
Loss = 1.5836e-01, PNorm = 68.0268, GNorm = 1.1452, lr_0 = 6.0425e-04
Loss = 1.4428e-01, PNorm = 68.0474, GNorm = 0.7076, lr_0 = 6.0384e-04
Loss = 1.6507e-01, PNorm = 68.0746, GNorm = 1.3957, lr_0 = 6.0343e-04
Loss = 1.4641e-01, PNorm = 68.0960, GNorm = 1.0681, lr_0 = 6.0301e-04
Loss = 1.4681e-01, PNorm = 68.1187, GNorm = 0.7539, lr_0 = 6.0260e-04
Loss = 1.3867e-01, PNorm = 68.1333, GNorm = 0.6132, lr_0 = 6.0219e-04
Loss = 1.3887e-01, PNorm = 68.1511, GNorm = 0.6405, lr_0 = 6.0178e-04
Loss = 1.4802e-01, PNorm = 68.1702, GNorm = 0.6333, lr_0 = 6.0136e-04
Loss = 1.8552e-01, PNorm = 68.1834, GNorm = 0.8557, lr_0 = 6.0095e-04
Loss = 1.4022e-01, PNorm = 68.2028, GNorm = 1.0922, lr_0 = 6.0054e-04
Loss = 1.7368e-01, PNorm = 68.2217, GNorm = 0.9481, lr_0 = 6.0013e-04
Loss = 1.5142e-01, PNorm = 68.2447, GNorm = 0.6267, lr_0 = 5.9972e-04
Loss = 1.4758e-01, PNorm = 68.2651, GNorm = 1.2460, lr_0 = 5.9931e-04
Loss = 1.3381e-01, PNorm = 68.2866, GNorm = 0.9277, lr_0 = 5.9890e-04
Loss = 1.5878e-01, PNorm = 68.3079, GNorm = 0.5438, lr_0 = 5.9849e-04
Loss = 1.6166e-01, PNorm = 68.3314, GNorm = 0.5803, lr_0 = 5.9808e-04
Loss = 1.4587e-01, PNorm = 68.3606, GNorm = 0.9499, lr_0 = 5.9767e-04
Loss = 1.3184e-01, PNorm = 68.3822, GNorm = 0.7604, lr_0 = 5.9726e-04
Loss = 1.5684e-01, PNorm = 68.4041, GNorm = 0.4880, lr_0 = 5.9685e-04
Loss = 1.4619e-01, PNorm = 68.4188, GNorm = 0.8259, lr_0 = 5.9644e-04
Loss = 1.5172e-01, PNorm = 68.4434, GNorm = 0.6504, lr_0 = 5.9603e-04
Loss = 1.5233e-01, PNorm = 68.4669, GNorm = 0.6645, lr_0 = 5.9562e-04
Loss = 1.4540e-01, PNorm = 68.4823, GNorm = 0.4827, lr_0 = 5.9521e-04
Loss = 1.4574e-01, PNorm = 68.4917, GNorm = 0.9622, lr_0 = 5.9481e-04
Loss = 1.4779e-01, PNorm = 68.5071, GNorm = 0.5968, lr_0 = 5.9440e-04
Loss = 1.8627e-01, PNorm = 68.5270, GNorm = 1.1950, lr_0 = 5.9399e-04
Loss = 1.7561e-01, PNorm = 68.5549, GNorm = 1.4331, lr_0 = 5.9358e-04
Loss = 1.4749e-01, PNorm = 68.5796, GNorm = 0.9320, lr_0 = 5.9318e-04
Loss = 1.5939e-01, PNorm = 68.6030, GNorm = 1.0679, lr_0 = 5.9277e-04
Loss = 1.3534e-01, PNorm = 68.6283, GNorm = 1.0259, lr_0 = 5.9236e-04
Loss = 1.5197e-01, PNorm = 68.6469, GNorm = 0.8615, lr_0 = 5.9196e-04
Loss = 1.6205e-01, PNorm = 68.6675, GNorm = 1.1482, lr_0 = 5.9155e-04
Loss = 1.6820e-01, PNorm = 68.6912, GNorm = 1.0773, lr_0 = 5.9115e-04
Loss = 1.7263e-01, PNorm = 68.7171, GNorm = 0.7233, lr_0 = 5.9074e-04
Loss = 1.4948e-01, PNorm = 68.7362, GNorm = 0.8674, lr_0 = 5.9034e-04
Loss = 1.6334e-01, PNorm = 68.7573, GNorm = 0.6354, lr_0 = 5.8993e-04
Loss = 1.7174e-01, PNorm = 68.7764, GNorm = 0.7722, lr_0 = 5.8953e-04
Loss = 1.6827e-01, PNorm = 68.7946, GNorm = 0.6604, lr_0 = 5.8913e-04
Loss = 1.4901e-01, PNorm = 68.8113, GNorm = 0.8183, lr_0 = 5.8872e-04
Loss = 1.5559e-01, PNorm = 68.8306, GNorm = 0.9775, lr_0 = 5.8832e-04
Loss = 1.4761e-01, PNorm = 68.8560, GNorm = 0.8829, lr_0 = 5.8792e-04
Loss = 1.6350e-01, PNorm = 68.8730, GNorm = 0.6861, lr_0 = 5.8751e-04
Loss = 1.2902e-01, PNorm = 68.8916, GNorm = 0.7972, lr_0 = 5.8711e-04
Loss = 1.5233e-01, PNorm = 68.9104, GNorm = 0.6212, lr_0 = 5.8671e-04
Loss = 1.4281e-01, PNorm = 68.9311, GNorm = 0.6832, lr_0 = 5.8631e-04
Loss = 1.5107e-01, PNorm = 68.9498, GNorm = 0.6556, lr_0 = 5.8591e-04
Loss = 1.4886e-01, PNorm = 68.9694, GNorm = 0.8679, lr_0 = 5.8550e-04
Loss = 1.3196e-01, PNorm = 68.9875, GNorm = 0.8170, lr_0 = 5.8510e-04
Loss = 1.3864e-01, PNorm = 68.9972, GNorm = 0.6357, lr_0 = 5.8470e-04
Loss = 1.3878e-01, PNorm = 69.0165, GNorm = 1.0338, lr_0 = 5.8430e-04
Loss = 1.4403e-01, PNorm = 69.0371, GNorm = 0.9683, lr_0 = 5.8390e-04
Loss = 1.4336e-01, PNorm = 69.0541, GNorm = 0.5409, lr_0 = 5.8350e-04
Loss = 1.4935e-01, PNorm = 69.0668, GNorm = 1.1648, lr_0 = 5.8310e-04
Loss = 1.5777e-01, PNorm = 69.0858, GNorm = 1.1004, lr_0 = 5.8270e-04
Loss = 1.4489e-01, PNorm = 69.1058, GNorm = 0.6116, lr_0 = 5.8230e-04
Loss = 1.2546e-01, PNorm = 69.1271, GNorm = 0.6048, lr_0 = 5.8190e-04
Loss = 1.4344e-01, PNorm = 69.1434, GNorm = 0.7480, lr_0 = 5.8151e-04
Loss = 1.3021e-01, PNorm = 69.1552, GNorm = 0.5977, lr_0 = 5.8111e-04
Loss = 1.5275e-01, PNorm = 69.1748, GNorm = 0.7145, lr_0 = 5.8071e-04
Loss = 1.4843e-01, PNorm = 69.1967, GNorm = 0.6937, lr_0 = 5.8031e-04
Loss = 1.4945e-01, PNorm = 69.2114, GNorm = 0.9389, lr_0 = 5.7991e-04
Loss = 1.4509e-01, PNorm = 69.2240, GNorm = 0.4582, lr_0 = 5.7952e-04
Loss = 1.6911e-01, PNorm = 69.2382, GNorm = 0.9121, lr_0 = 5.7912e-04
Loss = 1.5373e-01, PNorm = 69.2484, GNorm = 1.0193, lr_0 = 5.7872e-04
Loss = 1.5877e-01, PNorm = 69.2617, GNorm = 0.7083, lr_0 = 5.7833e-04
Loss = 1.6504e-01, PNorm = 69.2814, GNorm = 0.7332, lr_0 = 5.7793e-04
Loss = 1.4375e-01, PNorm = 69.3072, GNorm = 1.2914, lr_0 = 5.7753e-04
Loss = 1.4408e-01, PNorm = 69.3291, GNorm = 0.5871, lr_0 = 5.7714e-04
Loss = 1.3251e-01, PNorm = 69.3522, GNorm = 0.5987, lr_0 = 5.7674e-04
Loss = 1.3309e-01, PNorm = 69.3710, GNorm = 1.1099, lr_0 = 5.7635e-04
Loss = 1.5761e-01, PNorm = 69.3852, GNorm = 0.6389, lr_0 = 5.7595e-04
Loss = 1.4661e-01, PNorm = 69.3974, GNorm = 0.8314, lr_0 = 5.7556e-04
Loss = 1.4040e-01, PNorm = 69.4180, GNorm = 0.9852, lr_0 = 5.7516e-04
Loss = 1.4739e-01, PNorm = 69.4273, GNorm = 0.8726, lr_0 = 5.7477e-04
Loss = 1.4613e-01, PNorm = 69.4380, GNorm = 0.9755, lr_0 = 5.7438e-04
Loss = 1.5977e-01, PNorm = 69.4545, GNorm = 0.5383, lr_0 = 5.7398e-04
Loss = 1.5550e-01, PNorm = 69.4700, GNorm = 1.0040, lr_0 = 5.7359e-04
Loss = 1.3286e-01, PNorm = 69.4817, GNorm = 0.8575, lr_0 = 5.7320e-04
Loss = 1.3449e-01, PNorm = 69.4927, GNorm = 1.1443, lr_0 = 5.7280e-04
Loss = 1.4266e-01, PNorm = 69.5130, GNorm = 0.7164, lr_0 = 5.7241e-04
Loss = 1.3566e-01, PNorm = 69.5378, GNorm = 1.4339, lr_0 = 5.7202e-04
Loss = 1.6257e-01, PNorm = 69.5581, GNorm = 1.1317, lr_0 = 5.7163e-04
Loss = 1.2634e-01, PNorm = 69.5712, GNorm = 0.9775, lr_0 = 5.7124e-04
Loss = 1.5882e-01, PNorm = 69.5877, GNorm = 0.7019, lr_0 = 5.7084e-04
Loss = 1.5658e-01, PNorm = 69.6064, GNorm = 1.3947, lr_0 = 5.7045e-04
Loss = 1.4807e-01, PNorm = 69.6257, GNorm = 1.3424, lr_0 = 5.7006e-04
Loss = 1.6708e-01, PNorm = 69.6491, GNorm = 0.7150, lr_0 = 5.6967e-04
Loss = 1.4737e-01, PNorm = 69.6712, GNorm = 1.0274, lr_0 = 5.6928e-04
Loss = 1.5316e-01, PNorm = 69.6886, GNorm = 1.3293, lr_0 = 5.6889e-04
Loss = 1.3091e-01, PNorm = 69.7055, GNorm = 1.0021, lr_0 = 5.6850e-04
Loss = 1.5721e-01, PNorm = 69.7170, GNorm = 1.2050, lr_0 = 5.6811e-04
Loss = 1.4889e-01, PNorm = 69.7240, GNorm = 0.6136, lr_0 = 5.6772e-04
Loss = 1.6637e-01, PNorm = 69.7372, GNorm = 1.1044, lr_0 = 5.6733e-04
Loss = 1.5432e-01, PNorm = 69.7561, GNorm = 0.5495, lr_0 = 5.6695e-04
Loss = 1.6526e-01, PNorm = 69.7710, GNorm = 0.9712, lr_0 = 5.6656e-04
Loss = 1.5477e-01, PNorm = 69.7910, GNorm = 0.9159, lr_0 = 5.6617e-04
Loss = 1.4663e-01, PNorm = 69.8008, GNorm = 0.6896, lr_0 = 5.6578e-04
Loss = 1.4856e-01, PNorm = 69.8223, GNorm = 0.5717, lr_0 = 5.6539e-04
Loss = 1.3538e-01, PNorm = 69.8354, GNorm = 0.5666, lr_0 = 5.6501e-04
Loss = 1.4202e-01, PNorm = 69.8527, GNorm = 0.7505, lr_0 = 5.6462e-04
Loss = 1.4698e-01, PNorm = 69.8660, GNorm = 1.0702, lr_0 = 5.6423e-04
Loss = 1.3598e-01, PNorm = 69.8815, GNorm = 0.8715, lr_0 = 5.6385e-04
Loss = 1.6516e-01, PNorm = 69.8973, GNorm = 0.6621, lr_0 = 5.6346e-04
Loss = 1.5349e-01, PNorm = 69.9122, GNorm = 0.9452, lr_0 = 5.6307e-04
Loss = 1.4828e-01, PNorm = 69.9270, GNorm = 0.8517, lr_0 = 5.6269e-04
Loss = 1.5091e-01, PNorm = 69.9425, GNorm = 0.5595, lr_0 = 5.6230e-04
Validation mae = 0.244407
Epoch 9
Loss = 1.5484e-01, PNorm = 69.9589, GNorm = 0.6947, lr_0 = 5.6192e-04
Loss = 1.4001e-01, PNorm = 69.9774, GNorm = 1.3807, lr_0 = 5.6153e-04
Loss = 1.2890e-01, PNorm = 69.9900, GNorm = 0.9669, lr_0 = 5.6115e-04
Loss = 1.1932e-01, PNorm = 70.0032, GNorm = 0.6467, lr_0 = 5.6076e-04
Loss = 1.3019e-01, PNorm = 70.0191, GNorm = 0.6868, lr_0 = 5.6038e-04
Loss = 1.4450e-01, PNorm = 70.0353, GNorm = 0.6341, lr_0 = 5.6000e-04
Loss = 1.4239e-01, PNorm = 70.0480, GNorm = 0.7379, lr_0 = 5.5961e-04
Loss = 1.3089e-01, PNorm = 70.0632, GNorm = 1.0963, lr_0 = 5.5923e-04
Loss = 1.3381e-01, PNorm = 70.0741, GNorm = 0.7751, lr_0 = 5.5885e-04
Loss = 1.1889e-01, PNorm = 70.0887, GNorm = 0.8358, lr_0 = 5.5846e-04
Loss = 1.3095e-01, PNorm = 70.1024, GNorm = 0.6713, lr_0 = 5.5808e-04
Loss = 1.4120e-01, PNorm = 70.1240, GNorm = 0.5683, lr_0 = 5.5770e-04
Loss = 1.3798e-01, PNorm = 70.1499, GNorm = 0.5015, lr_0 = 5.5732e-04
Loss = 1.2546e-01, PNorm = 70.1699, GNorm = 1.0173, lr_0 = 5.5693e-04
Loss = 1.4799e-01, PNorm = 70.1875, GNorm = 1.3307, lr_0 = 5.5655e-04
Loss = 1.4697e-01, PNorm = 70.2038, GNorm = 0.8668, lr_0 = 5.5617e-04
Loss = 1.3093e-01, PNorm = 70.2212, GNorm = 0.6592, lr_0 = 5.5579e-04
Loss = 1.3179e-01, PNorm = 70.2366, GNorm = 0.5882, lr_0 = 5.5541e-04
Loss = 1.3442e-01, PNorm = 70.2571, GNorm = 0.5605, lr_0 = 5.5503e-04
Loss = 1.4426e-01, PNorm = 70.2830, GNorm = 0.8243, lr_0 = 5.5465e-04
Loss = 1.2838e-01, PNorm = 70.2992, GNorm = 0.6118, lr_0 = 5.5427e-04
Loss = 1.5240e-01, PNorm = 70.3158, GNorm = 1.1055, lr_0 = 5.5389e-04
Loss = 1.2109e-01, PNorm = 70.3377, GNorm = 0.7286, lr_0 = 5.5351e-04
Loss = 1.4587e-01, PNorm = 70.3556, GNorm = 0.7302, lr_0 = 5.5313e-04
Loss = 1.5540e-01, PNorm = 70.3735, GNorm = 1.1492, lr_0 = 5.5275e-04
Loss = 1.5883e-01, PNorm = 70.3858, GNorm = 0.9288, lr_0 = 5.5237e-04
Loss = 1.6339e-01, PNorm = 70.4008, GNorm = 0.8838, lr_0 = 5.5199e-04
Loss = 1.3658e-01, PNorm = 70.4232, GNorm = 0.5512, lr_0 = 5.5162e-04
Loss = 1.3411e-01, PNorm = 70.4369, GNorm = 0.9245, lr_0 = 5.5124e-04
Loss = 1.4402e-01, PNorm = 70.4589, GNorm = 0.7400, lr_0 = 5.5086e-04
Loss = 1.2969e-01, PNorm = 70.4771, GNorm = 0.9524, lr_0 = 5.5048e-04
Loss = 1.3937e-01, PNorm = 70.4908, GNorm = 0.7489, lr_0 = 5.5011e-04
Loss = 1.3219e-01, PNorm = 70.5029, GNorm = 0.7740, lr_0 = 5.4973e-04
Loss = 1.3742e-01, PNorm = 70.5204, GNorm = 0.5812, lr_0 = 5.4935e-04
Loss = 1.4349e-01, PNorm = 70.5365, GNorm = 0.9232, lr_0 = 5.4898e-04
Loss = 1.6017e-01, PNorm = 70.5600, GNorm = 0.9945, lr_0 = 5.4860e-04
Loss = 1.4394e-01, PNorm = 70.5776, GNorm = 1.1552, lr_0 = 5.4822e-04
Loss = 1.5764e-01, PNorm = 70.6005, GNorm = 0.5363, lr_0 = 5.4785e-04
Loss = 1.5347e-01, PNorm = 70.6196, GNorm = 0.8394, lr_0 = 5.4747e-04
Loss = 1.5174e-01, PNorm = 70.6444, GNorm = 0.8128, lr_0 = 5.4710e-04
Loss = 1.4581e-01, PNorm = 70.6663, GNorm = 0.7607, lr_0 = 5.4672e-04
Loss = 1.3614e-01, PNorm = 70.6833, GNorm = 1.0649, lr_0 = 5.4635e-04
Loss = 1.3654e-01, PNorm = 70.6998, GNorm = 1.3476, lr_0 = 5.4597e-04
Loss = 1.5433e-01, PNorm = 70.7207, GNorm = 0.7588, lr_0 = 5.4560e-04
Loss = 1.3672e-01, PNorm = 70.7405, GNorm = 0.6771, lr_0 = 5.4523e-04
Loss = 1.3231e-01, PNorm = 70.7531, GNorm = 0.7374, lr_0 = 5.4485e-04
Loss = 1.3450e-01, PNorm = 70.7728, GNorm = 0.6783, lr_0 = 5.4448e-04
Loss = 1.3863e-01, PNorm = 70.7851, GNorm = 0.6372, lr_0 = 5.4411e-04
Loss = 1.2556e-01, PNorm = 70.7994, GNorm = 0.6899, lr_0 = 5.4373e-04
Loss = 1.3317e-01, PNorm = 70.8180, GNorm = 0.9454, lr_0 = 5.4336e-04
Loss = 1.2394e-01, PNorm = 70.8392, GNorm = 0.9609, lr_0 = 5.4299e-04
Loss = 1.5561e-01, PNorm = 70.8517, GNorm = 0.6788, lr_0 = 5.4262e-04
Loss = 1.4620e-01, PNorm = 70.8629, GNorm = 1.2249, lr_0 = 5.4225e-04
Loss = 1.7061e-01, PNorm = 70.8767, GNorm = 1.0926, lr_0 = 5.4187e-04
Loss = 1.4327e-01, PNorm = 70.8904, GNorm = 0.9300, lr_0 = 5.4150e-04
Loss = 1.6194e-01, PNorm = 70.9045, GNorm = 0.5968, lr_0 = 5.4113e-04
Loss = 1.4920e-01, PNorm = 70.9305, GNorm = 1.0298, lr_0 = 5.4076e-04
Loss = 1.4938e-01, PNorm = 70.9594, GNorm = 1.0277, lr_0 = 5.4039e-04
Loss = 1.3962e-01, PNorm = 70.9829, GNorm = 0.6074, lr_0 = 5.4002e-04
Loss = 1.4425e-01, PNorm = 70.9986, GNorm = 1.1372, lr_0 = 5.3965e-04
Loss = 1.5467e-01, PNorm = 71.0136, GNorm = 1.1474, lr_0 = 5.3928e-04
Loss = 1.2940e-01, PNorm = 71.0413, GNorm = 0.9390, lr_0 = 5.3891e-04
Loss = 1.3770e-01, PNorm = 71.0544, GNorm = 0.4905, lr_0 = 5.3854e-04
Loss = 1.4907e-01, PNorm = 71.0762, GNorm = 0.9201, lr_0 = 5.3817e-04
Loss = 1.5616e-01, PNorm = 71.0935, GNorm = 1.6102, lr_0 = 5.3781e-04
Loss = 1.4462e-01, PNorm = 71.1059, GNorm = 0.6501, lr_0 = 5.3744e-04
Loss = 1.5352e-01, PNorm = 71.1211, GNorm = 1.0436, lr_0 = 5.3707e-04
Loss = 1.2984e-01, PNorm = 71.1377, GNorm = 1.0162, lr_0 = 5.3670e-04
Loss = 1.3964e-01, PNorm = 71.1602, GNorm = 0.6843, lr_0 = 5.3633e-04
Loss = 1.4399e-01, PNorm = 71.1825, GNorm = 1.0138, lr_0 = 5.3597e-04
Loss = 1.2979e-01, PNorm = 71.2008, GNorm = 0.7818, lr_0 = 5.3560e-04
Loss = 1.5240e-01, PNorm = 71.2147, GNorm = 0.8333, lr_0 = 5.3523e-04
Loss = 1.3619e-01, PNorm = 71.2256, GNorm = 1.2137, lr_0 = 5.3486e-04
Loss = 1.5922e-01, PNorm = 71.2371, GNorm = 0.8404, lr_0 = 5.3450e-04
Loss = 1.3980e-01, PNorm = 71.2555, GNorm = 0.7569, lr_0 = 5.3413e-04
Loss = 1.2328e-01, PNorm = 71.2703, GNorm = 0.8758, lr_0 = 5.3377e-04
Loss = 1.4087e-01, PNorm = 71.2896, GNorm = 1.1424, lr_0 = 5.3340e-04
Loss = 1.4551e-01, PNorm = 71.3141, GNorm = 0.8702, lr_0 = 5.3304e-04
Loss = 1.4035e-01, PNorm = 71.3341, GNorm = 1.0134, lr_0 = 5.3267e-04
Loss = 1.4615e-01, PNorm = 71.3513, GNorm = 0.7323, lr_0 = 5.3231e-04
Loss = 1.4294e-01, PNorm = 71.3696, GNorm = 0.5949, lr_0 = 5.3194e-04
Loss = 1.3872e-01, PNorm = 71.3876, GNorm = 0.7838, lr_0 = 5.3158e-04
Loss = 1.3259e-01, PNorm = 71.4028, GNorm = 0.7886, lr_0 = 5.3121e-04
Loss = 1.4024e-01, PNorm = 71.4141, GNorm = 0.9217, lr_0 = 5.3085e-04
Loss = 1.3609e-01, PNorm = 71.4301, GNorm = 0.7961, lr_0 = 5.3048e-04
Loss = 1.3120e-01, PNorm = 71.4433, GNorm = 0.6682, lr_0 = 5.3012e-04
Loss = 1.5379e-01, PNorm = 71.4585, GNorm = 0.7369, lr_0 = 5.2976e-04
Loss = 1.4876e-01, PNorm = 71.4760, GNorm = 1.1546, lr_0 = 5.2939e-04
Loss = 1.7496e-01, PNorm = 71.4895, GNorm = 0.6694, lr_0 = 5.2903e-04
Loss = 1.4968e-01, PNorm = 71.5110, GNorm = 0.6590, lr_0 = 5.2867e-04
Loss = 1.5223e-01, PNorm = 71.5243, GNorm = 0.6289, lr_0 = 5.2831e-04
Loss = 1.2640e-01, PNorm = 71.5420, GNorm = 1.6255, lr_0 = 5.2795e-04
Loss = 1.3087e-01, PNorm = 71.5612, GNorm = 1.5459, lr_0 = 5.2758e-04
Loss = 1.4514e-01, PNorm = 71.5743, GNorm = 1.0149, lr_0 = 5.2722e-04
Loss = 1.7018e-01, PNorm = 71.5892, GNorm = 0.5725, lr_0 = 5.2686e-04
Loss = 1.3796e-01, PNorm = 71.6072, GNorm = 0.6759, lr_0 = 5.2650e-04
Loss = 1.4768e-01, PNorm = 71.6163, GNorm = 0.8350, lr_0 = 5.2614e-04
Loss = 1.4342e-01, PNorm = 71.6279, GNorm = 0.6683, lr_0 = 5.2578e-04
Loss = 1.6920e-01, PNorm = 71.6431, GNorm = 1.0903, lr_0 = 5.2542e-04
Loss = 1.3883e-01, PNorm = 71.6565, GNorm = 0.5750, lr_0 = 5.2506e-04
Loss = 1.3679e-01, PNorm = 71.6706, GNorm = 1.1775, lr_0 = 5.2470e-04
Loss = 1.4834e-01, PNorm = 71.6860, GNorm = 0.6656, lr_0 = 5.2434e-04
Loss = 1.4760e-01, PNorm = 71.6987, GNorm = 0.8070, lr_0 = 5.2398e-04
Loss = 1.5585e-01, PNorm = 71.7198, GNorm = 0.7693, lr_0 = 5.2362e-04
Loss = 1.5052e-01, PNorm = 71.7402, GNorm = 0.5685, lr_0 = 5.2326e-04
Loss = 1.6346e-01, PNorm = 71.7537, GNorm = 0.7675, lr_0 = 5.2290e-04
Loss = 1.3710e-01, PNorm = 71.7689, GNorm = 0.9661, lr_0 = 5.2255e-04
Loss = 1.5075e-01, PNorm = 71.7786, GNorm = 1.5141, lr_0 = 5.2219e-04
Loss = 1.4279e-01, PNorm = 71.7992, GNorm = 0.6699, lr_0 = 5.2183e-04
Loss = 1.3936e-01, PNorm = 71.8150, GNorm = 0.6962, lr_0 = 5.2147e-04
Loss = 1.2772e-01, PNorm = 71.8322, GNorm = 0.7210, lr_0 = 5.2112e-04
Loss = 1.4863e-01, PNorm = 71.8425, GNorm = 1.3019, lr_0 = 5.2076e-04
Loss = 1.4580e-01, PNorm = 71.8581, GNorm = 0.8428, lr_0 = 5.2040e-04
Loss = 1.3574e-01, PNorm = 71.8682, GNorm = 0.5757, lr_0 = 5.2005e-04
Loss = 1.3424e-01, PNorm = 71.8820, GNorm = 0.5256, lr_0 = 5.1969e-04
Loss = 1.3333e-01, PNorm = 71.9008, GNorm = 0.5470, lr_0 = 5.1933e-04
Loss = 1.4642e-01, PNorm = 71.9162, GNorm = 0.7601, lr_0 = 5.1898e-04
Loss = 1.4245e-01, PNorm = 71.9345, GNorm = 0.7246, lr_0 = 5.1862e-04
Loss = 1.3664e-01, PNorm = 71.9502, GNorm = 0.8749, lr_0 = 5.1827e-04
Loss = 1.5571e-01, PNorm = 71.9597, GNorm = 0.8952, lr_0 = 5.1791e-04
Validation mae = 0.239219
Epoch 10
Loss = 1.3518e-01, PNorm = 71.9737, GNorm = 0.5204, lr_0 = 5.1756e-04
Loss = 1.2250e-01, PNorm = 71.9829, GNorm = 0.6752, lr_0 = 5.1720e-04
Loss = 1.3462e-01, PNorm = 72.0014, GNorm = 0.8581, lr_0 = 5.1685e-04
Loss = 1.4233e-01, PNorm = 72.0207, GNorm = 0.5421, lr_0 = 5.1649e-04
Loss = 1.1955e-01, PNorm = 72.0386, GNorm = 0.6397, lr_0 = 5.1614e-04
Loss = 1.2865e-01, PNorm = 72.0525, GNorm = 0.5537, lr_0 = 5.1579e-04
Loss = 1.3320e-01, PNorm = 72.0663, GNorm = 1.1886, lr_0 = 5.1543e-04
Loss = 1.4722e-01, PNorm = 72.0847, GNorm = 1.4177, lr_0 = 5.1508e-04
Loss = 1.2418e-01, PNorm = 72.1067, GNorm = 0.7518, lr_0 = 5.1473e-04
Loss = 1.2009e-01, PNorm = 72.1274, GNorm = 0.8235, lr_0 = 5.1437e-04
Loss = 1.3826e-01, PNorm = 72.1466, GNorm = 0.7367, lr_0 = 5.1402e-04
Loss = 1.3641e-01, PNorm = 72.1608, GNorm = 0.7458, lr_0 = 5.1367e-04
Loss = 1.3029e-01, PNorm = 72.1674, GNorm = 0.7071, lr_0 = 5.1332e-04
Loss = 1.2329e-01, PNorm = 72.1805, GNorm = 0.7670, lr_0 = 5.1297e-04
Loss = 1.2521e-01, PNorm = 72.1948, GNorm = 1.1807, lr_0 = 5.1262e-04
Loss = 1.5864e-01, PNorm = 72.2139, GNorm = 0.7201, lr_0 = 5.1226e-04
Loss = 1.3463e-01, PNorm = 72.2358, GNorm = 0.6251, lr_0 = 5.1191e-04
Loss = 1.3330e-01, PNorm = 72.2487, GNorm = 0.4972, lr_0 = 5.1156e-04
Loss = 1.2840e-01, PNorm = 72.2691, GNorm = 0.4794, lr_0 = 5.1121e-04
Loss = 1.2955e-01, PNorm = 72.2863, GNorm = 1.3137, lr_0 = 5.1086e-04
Loss = 1.2732e-01, PNorm = 72.3029, GNorm = 0.6467, lr_0 = 5.1051e-04
Loss = 1.2633e-01, PNorm = 72.3167, GNorm = 0.5601, lr_0 = 5.1016e-04
Loss = 1.3534e-01, PNorm = 72.3212, GNorm = 0.6818, lr_0 = 5.0981e-04
Loss = 1.4419e-01, PNorm = 72.3329, GNorm = 0.6574, lr_0 = 5.0946e-04
Loss = 1.4234e-01, PNorm = 72.3466, GNorm = 0.8774, lr_0 = 5.0911e-04
Loss = 1.4961e-01, PNorm = 72.3582, GNorm = 0.6565, lr_0 = 5.0877e-04
Loss = 1.3721e-01, PNorm = 72.3708, GNorm = 0.7525, lr_0 = 5.0842e-04
Loss = 1.2578e-01, PNorm = 72.3811, GNorm = 0.6237, lr_0 = 5.0807e-04
Loss = 1.3680e-01, PNorm = 72.3930, GNorm = 0.5658, lr_0 = 5.0772e-04
Loss = 1.2279e-01, PNorm = 72.4050, GNorm = 0.7407, lr_0 = 5.0737e-04
Loss = 1.3449e-01, PNorm = 72.4186, GNorm = 1.0789, lr_0 = 5.0703e-04
Loss = 1.4459e-01, PNorm = 72.4352, GNorm = 0.7287, lr_0 = 5.0668e-04
Loss = 1.4012e-01, PNorm = 72.4560, GNorm = 0.9849, lr_0 = 5.0633e-04
Loss = 1.1828e-01, PNorm = 72.4770, GNorm = 0.5219, lr_0 = 5.0598e-04
Loss = 1.3505e-01, PNorm = 72.4899, GNorm = 0.7203, lr_0 = 5.0564e-04
Loss = 1.3500e-01, PNorm = 72.5025, GNorm = 1.0865, lr_0 = 5.0529e-04
Loss = 1.2561e-01, PNorm = 72.5162, GNorm = 1.1178, lr_0 = 5.0494e-04
Loss = 1.2789e-01, PNorm = 72.5316, GNorm = 0.9169, lr_0 = 5.0460e-04
Loss = 1.5008e-01, PNorm = 72.5433, GNorm = 0.8171, lr_0 = 5.0425e-04
Loss = 1.4309e-01, PNorm = 72.5582, GNorm = 0.9281, lr_0 = 5.0391e-04
Loss = 1.2804e-01, PNorm = 72.5735, GNorm = 0.8034, lr_0 = 5.0356e-04
Loss = 1.4492e-01, PNorm = 72.5894, GNorm = 0.6234, lr_0 = 5.0322e-04
Loss = 1.2602e-01, PNorm = 72.6129, GNorm = 0.6456, lr_0 = 5.0287e-04
Loss = 1.4287e-01, PNorm = 72.6369, GNorm = 0.7320, lr_0 = 5.0253e-04
Loss = 1.3564e-01, PNorm = 72.6577, GNorm = 0.6402, lr_0 = 5.0218e-04
Loss = 1.3645e-01, PNorm = 72.6692, GNorm = 0.5223, lr_0 = 5.0184e-04
Loss = 1.4574e-01, PNorm = 72.6771, GNorm = 1.0161, lr_0 = 5.0150e-04
Loss = 1.2972e-01, PNorm = 72.6912, GNorm = 0.8816, lr_0 = 5.0115e-04
Loss = 1.4625e-01, PNorm = 72.7057, GNorm = 0.7428, lr_0 = 5.0081e-04
Loss = 1.2928e-01, PNorm = 72.7203, GNorm = 0.4966, lr_0 = 5.0047e-04
Loss = 1.3087e-01, PNorm = 72.7372, GNorm = 1.0995, lr_0 = 5.0012e-04
Loss = 1.4240e-01, PNorm = 72.7521, GNorm = 0.8669, lr_0 = 4.9978e-04
Loss = 1.3604e-01, PNorm = 72.7677, GNorm = 0.8458, lr_0 = 4.9944e-04
Loss = 1.2656e-01, PNorm = 72.7803, GNorm = 0.6747, lr_0 = 4.9910e-04
Loss = 1.3220e-01, PNorm = 72.7939, GNorm = 0.5494, lr_0 = 4.9875e-04
Loss = 1.4113e-01, PNorm = 72.8076, GNorm = 0.7786, lr_0 = 4.9841e-04
Loss = 1.2812e-01, PNorm = 72.8223, GNorm = 1.1878, lr_0 = 4.9807e-04
Loss = 1.3372e-01, PNorm = 72.8374, GNorm = 1.4499, lr_0 = 4.9773e-04
Loss = 1.4884e-01, PNorm = 72.8517, GNorm = 1.4492, lr_0 = 4.9739e-04
Loss = 1.3775e-01, PNorm = 72.8723, GNorm = 0.9757, lr_0 = 4.9705e-04
Loss = 1.4911e-01, PNorm = 72.8888, GNorm = 0.6479, lr_0 = 4.9671e-04
Loss = 1.2669e-01, PNorm = 72.9061, GNorm = 0.6415, lr_0 = 4.9637e-04
Loss = 1.3714e-01, PNorm = 72.9212, GNorm = 0.6437, lr_0 = 4.9603e-04
Loss = 1.5240e-01, PNorm = 72.9367, GNorm = 0.7691, lr_0 = 4.9569e-04
Loss = 1.3434e-01, PNorm = 72.9510, GNorm = 0.6288, lr_0 = 4.9535e-04
Loss = 1.1736e-01, PNorm = 72.9657, GNorm = 0.6055, lr_0 = 4.9501e-04
Loss = 1.4142e-01, PNorm = 72.9839, GNorm = 0.6428, lr_0 = 4.9467e-04
Loss = 1.2136e-01, PNorm = 73.0024, GNorm = 0.5906, lr_0 = 4.9433e-04
Loss = 1.3758e-01, PNorm = 73.0150, GNorm = 0.4963, lr_0 = 4.9399e-04
Loss = 1.4352e-01, PNorm = 73.0269, GNorm = 0.6602, lr_0 = 4.9365e-04
Loss = 1.3176e-01, PNorm = 73.0391, GNorm = 0.6039, lr_0 = 4.9332e-04
Loss = 1.3327e-01, PNorm = 73.0523, GNorm = 0.6119, lr_0 = 4.9298e-04
Loss = 1.3926e-01, PNorm = 73.0714, GNorm = 0.7673, lr_0 = 4.9264e-04
Loss = 1.2518e-01, PNorm = 73.0785, GNorm = 0.8395, lr_0 = 4.9230e-04
Loss = 1.3885e-01, PNorm = 73.0942, GNorm = 0.8029, lr_0 = 4.9197e-04
Loss = 1.2325e-01, PNorm = 73.1067, GNorm = 0.7412, lr_0 = 4.9163e-04
Loss = 1.4973e-01, PNorm = 73.1162, GNorm = 1.0129, lr_0 = 4.9129e-04
Loss = 1.6298e-01, PNorm = 73.1300, GNorm = 1.2174, lr_0 = 4.9095e-04
Loss = 1.4355e-01, PNorm = 73.1446, GNorm = 0.6803, lr_0 = 4.9062e-04
Loss = 1.3781e-01, PNorm = 73.1594, GNorm = 1.0923, lr_0 = 4.9028e-04
Loss = 1.3862e-01, PNorm = 73.1744, GNorm = 0.9725, lr_0 = 4.8995e-04
Loss = 1.4304e-01, PNorm = 73.1915, GNorm = 0.6537, lr_0 = 4.8961e-04
Loss = 1.5645e-01, PNorm = 73.2105, GNorm = 1.2084, lr_0 = 4.8928e-04
Loss = 1.4024e-01, PNorm = 73.2279, GNorm = 0.8368, lr_0 = 4.8894e-04
Loss = 1.2636e-01, PNorm = 73.2393, GNorm = 0.6837, lr_0 = 4.8861e-04
Loss = 1.4226e-01, PNorm = 73.2525, GNorm = 0.8473, lr_0 = 4.8827e-04
Loss = 1.3806e-01, PNorm = 73.2633, GNorm = 0.8550, lr_0 = 4.8794e-04
Loss = 1.3052e-01, PNorm = 73.2788, GNorm = 1.1837, lr_0 = 4.8760e-04
Loss = 1.3284e-01, PNorm = 73.2948, GNorm = 1.0639, lr_0 = 4.8727e-04
Loss = 1.5362e-01, PNorm = 73.3118, GNorm = 0.6890, lr_0 = 4.8693e-04
Loss = 1.5250e-01, PNorm = 73.3252, GNorm = 1.0763, lr_0 = 4.8660e-04
Loss = 1.2694e-01, PNorm = 73.3371, GNorm = 0.5790, lr_0 = 4.8627e-04
Loss = 1.3796e-01, PNorm = 73.3540, GNorm = 0.9709, lr_0 = 4.8593e-04
Loss = 1.4536e-01, PNorm = 73.3678, GNorm = 0.8742, lr_0 = 4.8560e-04
Loss = 1.2808e-01, PNorm = 73.3795, GNorm = 0.6314, lr_0 = 4.8527e-04
Loss = 1.1908e-01, PNorm = 73.3908, GNorm = 1.2331, lr_0 = 4.8494e-04
Loss = 1.2289e-01, PNorm = 73.3933, GNorm = 0.6406, lr_0 = 4.8460e-04
Loss = 1.4322e-01, PNorm = 73.3998, GNorm = 0.8713, lr_0 = 4.8427e-04
Loss = 1.3505e-01, PNorm = 73.4175, GNorm = 0.5985, lr_0 = 4.8394e-04
Loss = 1.2615e-01, PNorm = 73.4252, GNorm = 0.5262, lr_0 = 4.8361e-04
Loss = 1.2739e-01, PNorm = 73.4368, GNorm = 0.5787, lr_0 = 4.8328e-04
Loss = 1.2985e-01, PNorm = 73.4582, GNorm = 0.8403, lr_0 = 4.8295e-04
Loss = 1.4338e-01, PNorm = 73.4731, GNorm = 0.7590, lr_0 = 4.8262e-04
Loss = 1.2489e-01, PNorm = 73.4887, GNorm = 0.5989, lr_0 = 4.8228e-04
Loss = 1.3679e-01, PNorm = 73.4984, GNorm = 1.3325, lr_0 = 4.8195e-04
Loss = 1.1631e-01, PNorm = 73.5092, GNorm = 0.6972, lr_0 = 4.8162e-04
Loss = 1.3201e-01, PNorm = 73.5209, GNorm = 0.5971, lr_0 = 4.8129e-04
Loss = 1.3494e-01, PNorm = 73.5326, GNorm = 1.0615, lr_0 = 4.8096e-04
Loss = 1.4734e-01, PNorm = 73.5439, GNorm = 0.6459, lr_0 = 4.8064e-04
Loss = 1.4962e-01, PNorm = 73.5585, GNorm = 0.9233, lr_0 = 4.8031e-04
Loss = 1.2574e-01, PNorm = 73.5760, GNorm = 0.7445, lr_0 = 4.7998e-04
Loss = 1.4733e-01, PNorm = 73.5943, GNorm = 0.5063, lr_0 = 4.7965e-04
Loss = 1.4034e-01, PNorm = 73.6166, GNorm = 0.6737, lr_0 = 4.7932e-04
Loss = 1.5505e-01, PNorm = 73.6258, GNorm = 1.5225, lr_0 = 4.7899e-04
Loss = 1.5837e-01, PNorm = 73.6414, GNorm = 0.8568, lr_0 = 4.7866e-04
Loss = 1.5122e-01, PNorm = 73.6589, GNorm = 0.7330, lr_0 = 4.7833e-04
Loss = 1.4500e-01, PNorm = 73.6744, GNorm = 1.3438, lr_0 = 4.7801e-04
Loss = 1.2441e-01, PNorm = 73.6845, GNorm = 0.5048, lr_0 = 4.7768e-04
Loss = 1.2467e-01, PNorm = 73.6944, GNorm = 0.6647, lr_0 = 4.7735e-04
Loss = 1.2820e-01, PNorm = 73.7045, GNorm = 0.9292, lr_0 = 4.7703e-04
Validation mae = 0.234032
Epoch 11
Loss = 1.2323e-01, PNorm = 73.7174, GNorm = 0.6485, lr_0 = 4.7670e-04
Loss = 1.2979e-01, PNorm = 73.7281, GNorm = 0.5428, lr_0 = 4.7637e-04
Loss = 1.2039e-01, PNorm = 73.7398, GNorm = 0.6534, lr_0 = 4.7605e-04
Loss = 1.4261e-01, PNorm = 73.7555, GNorm = 0.6169, lr_0 = 4.7572e-04
Loss = 1.1339e-01, PNorm = 73.7734, GNorm = 0.4646, lr_0 = 4.7539e-04
Loss = 1.1558e-01, PNorm = 73.7859, GNorm = 0.4267, lr_0 = 4.7507e-04
Loss = 1.1226e-01, PNorm = 73.7987, GNorm = 0.5548, lr_0 = 4.7474e-04
Loss = 1.2191e-01, PNorm = 73.8143, GNorm = 0.6157, lr_0 = 4.7442e-04
Loss = 9.8089e-02, PNorm = 73.8248, GNorm = 0.5920, lr_0 = 4.7409e-04
Loss = 1.2427e-01, PNorm = 73.8354, GNorm = 0.9178, lr_0 = 4.7377e-04
Loss = 1.1583e-01, PNorm = 73.8463, GNorm = 0.8468, lr_0 = 4.7344e-04
Loss = 1.2765e-01, PNorm = 73.8651, GNorm = 0.5896, lr_0 = 4.7312e-04
Loss = 1.1636e-01, PNorm = 73.8758, GNorm = 0.8129, lr_0 = 4.7279e-04
Loss = 1.2734e-01, PNorm = 73.8873, GNorm = 0.6234, lr_0 = 4.7247e-04
Loss = 1.4272e-01, PNorm = 73.9007, GNorm = 1.1440, lr_0 = 4.7215e-04
Loss = 1.3398e-01, PNorm = 73.9192, GNorm = 0.7837, lr_0 = 4.7182e-04
Loss = 1.2165e-01, PNorm = 73.9341, GNorm = 0.5676, lr_0 = 4.7150e-04
Loss = 1.2718e-01, PNorm = 73.9436, GNorm = 0.6330, lr_0 = 4.7118e-04
Loss = 1.3982e-01, PNorm = 73.9556, GNorm = 0.8111, lr_0 = 4.7085e-04
Loss = 1.3348e-01, PNorm = 73.9697, GNorm = 1.9829, lr_0 = 4.7053e-04
Loss = 1.1751e-01, PNorm = 73.9836, GNorm = 0.7093, lr_0 = 4.7021e-04
Loss = 1.6248e-01, PNorm = 74.0032, GNorm = 0.9957, lr_0 = 4.6989e-04
Loss = 1.3237e-01, PNorm = 74.0191, GNorm = 0.7131, lr_0 = 4.6957e-04
Loss = 1.0786e-01, PNorm = 74.0303, GNorm = 0.9860, lr_0 = 4.6924e-04
Loss = 1.2711e-01, PNorm = 74.0392, GNorm = 0.7395, lr_0 = 4.6892e-04
Loss = 1.2157e-01, PNorm = 74.0518, GNorm = 0.8774, lr_0 = 4.6860e-04
Loss = 1.2528e-01, PNorm = 74.0624, GNorm = 0.5118, lr_0 = 4.6828e-04
Loss = 1.3453e-01, PNorm = 74.0791, GNorm = 0.6893, lr_0 = 4.6796e-04
Loss = 1.1722e-01, PNorm = 74.0968, GNorm = 0.7517, lr_0 = 4.6764e-04
Loss = 1.4469e-01, PNorm = 74.1164, GNorm = 1.1915, lr_0 = 4.6732e-04
Loss = 1.5045e-01, PNorm = 74.1410, GNorm = 0.7603, lr_0 = 4.6700e-04
Loss = 1.4213e-01, PNorm = 74.1505, GNorm = 0.8892, lr_0 = 4.6668e-04
Loss = 1.2975e-01, PNorm = 74.1650, GNorm = 0.4982, lr_0 = 4.6636e-04
Loss = 1.3650e-01, PNorm = 74.1719, GNorm = 0.8894, lr_0 = 4.6604e-04
Loss = 1.3553e-01, PNorm = 74.1807, GNorm = 0.8249, lr_0 = 4.6572e-04
Loss = 1.1972e-01, PNorm = 74.1933, GNorm = 0.4721, lr_0 = 4.6540e-04
Loss = 1.3654e-01, PNorm = 74.2083, GNorm = 0.7726, lr_0 = 4.6508e-04
Loss = 1.2570e-01, PNorm = 74.2213, GNorm = 0.7158, lr_0 = 4.6476e-04
Loss = 1.4327e-01, PNorm = 74.2370, GNorm = 0.6232, lr_0 = 4.6445e-04
Loss = 1.3693e-01, PNorm = 74.2579, GNorm = 0.7121, lr_0 = 4.6413e-04
Loss = 1.0969e-01, PNorm = 74.2691, GNorm = 0.5876, lr_0 = 4.6381e-04
Loss = 1.4894e-01, PNorm = 74.2796, GNorm = 0.4990, lr_0 = 4.6349e-04
Loss = 1.2389e-01, PNorm = 74.2889, GNorm = 0.7351, lr_0 = 4.6317e-04
Loss = 1.2328e-01, PNorm = 74.2945, GNorm = 1.2690, lr_0 = 4.6286e-04
Loss = 1.4598e-01, PNorm = 74.3128, GNorm = 0.6133, lr_0 = 4.6254e-04
Loss = 1.3073e-01, PNorm = 74.3281, GNorm = 0.9141, lr_0 = 4.6222e-04
Loss = 1.2553e-01, PNorm = 74.3419, GNorm = 0.8233, lr_0 = 4.6191e-04
Loss = 1.3053e-01, PNorm = 74.3585, GNorm = 0.7657, lr_0 = 4.6159e-04
Loss = 1.1085e-01, PNorm = 74.3708, GNorm = 0.7631, lr_0 = 4.6127e-04
Loss = 1.2033e-01, PNorm = 74.3883, GNorm = 0.4995, lr_0 = 4.6096e-04
Loss = 1.1642e-01, PNorm = 74.3977, GNorm = 0.5161, lr_0 = 4.6064e-04
Loss = 1.5029e-01, PNorm = 74.4104, GNorm = 0.5974, lr_0 = 4.6033e-04
Loss = 1.2002e-01, PNorm = 74.4255, GNorm = 0.4784, lr_0 = 4.6001e-04
Loss = 1.3754e-01, PNorm = 74.4339, GNorm = 1.2353, lr_0 = 4.5970e-04
Loss = 1.2688e-01, PNorm = 74.4465, GNorm = 0.6837, lr_0 = 4.5938e-04
Loss = 1.2067e-01, PNorm = 74.4613, GNorm = 0.8412, lr_0 = 4.5907e-04
Loss = 1.2881e-01, PNorm = 74.4748, GNorm = 0.8767, lr_0 = 4.5875e-04
Loss = 1.1421e-01, PNorm = 74.4877, GNorm = 0.7487, lr_0 = 4.5844e-04
Loss = 1.1944e-01, PNorm = 74.4964, GNorm = 0.6452, lr_0 = 4.5812e-04
Loss = 1.1922e-01, PNorm = 74.5058, GNorm = 0.8208, lr_0 = 4.5781e-04
Loss = 1.1320e-01, PNorm = 74.5191, GNorm = 0.5289, lr_0 = 4.5750e-04
Loss = 1.3691e-01, PNorm = 74.5328, GNorm = 0.6325, lr_0 = 4.5718e-04
Loss = 1.2324e-01, PNorm = 74.5447, GNorm = 0.7202, lr_0 = 4.5687e-04
Loss = 1.1266e-01, PNorm = 74.5564, GNorm = 0.7845, lr_0 = 4.5656e-04
Loss = 1.3328e-01, PNorm = 74.5652, GNorm = 0.5914, lr_0 = 4.5624e-04
Loss = 1.2502e-01, PNorm = 74.5739, GNorm = 0.6640, lr_0 = 4.5593e-04
Loss = 1.4041e-01, PNorm = 74.5872, GNorm = 1.0923, lr_0 = 4.5562e-04
Loss = 1.2722e-01, PNorm = 74.6045, GNorm = 0.8011, lr_0 = 4.5531e-04
Loss = 1.2074e-01, PNorm = 74.6136, GNorm = 0.8144, lr_0 = 4.5499e-04
Loss = 1.2363e-01, PNorm = 74.6246, GNorm = 0.5017, lr_0 = 4.5468e-04
Loss = 1.2401e-01, PNorm = 74.6388, GNorm = 0.6559, lr_0 = 4.5437e-04
Loss = 1.2373e-01, PNorm = 74.6486, GNorm = 1.4165, lr_0 = 4.5406e-04
Loss = 1.3631e-01, PNorm = 74.6553, GNorm = 1.2063, lr_0 = 4.5375e-04
Loss = 1.3391e-01, PNorm = 74.6639, GNorm = 0.6537, lr_0 = 4.5344e-04
Loss = 1.3000e-01, PNorm = 74.6714, GNorm = 0.6722, lr_0 = 4.5313e-04
Loss = 1.1128e-01, PNorm = 74.6808, GNorm = 0.5176, lr_0 = 4.5282e-04
Loss = 1.3824e-01, PNorm = 74.6924, GNorm = 0.5337, lr_0 = 4.5251e-04
Loss = 1.3910e-01, PNorm = 74.7081, GNorm = 0.8547, lr_0 = 4.5220e-04
Loss = 1.0980e-01, PNorm = 74.7258, GNorm = 0.8223, lr_0 = 4.5189e-04
Loss = 1.1263e-01, PNorm = 74.7400, GNorm = 0.6408, lr_0 = 4.5158e-04
Loss = 1.0558e-01, PNorm = 74.7463, GNorm = 0.7407, lr_0 = 4.5127e-04
Loss = 1.3559e-01, PNorm = 74.7575, GNorm = 0.5737, lr_0 = 4.5096e-04
Loss = 1.3185e-01, PNorm = 74.7680, GNorm = 0.8422, lr_0 = 4.5065e-04
Loss = 1.2871e-01, PNorm = 74.7805, GNorm = 0.5588, lr_0 = 4.5034e-04
Loss = 1.3160e-01, PNorm = 74.7971, GNorm = 0.8555, lr_0 = 4.5003e-04
Loss = 1.2998e-01, PNorm = 74.8063, GNorm = 1.3804, lr_0 = 4.4972e-04
Loss = 1.2837e-01, PNorm = 74.8187, GNorm = 0.7812, lr_0 = 4.4942e-04
Loss = 1.1825e-01, PNorm = 74.8261, GNorm = 0.6696, lr_0 = 4.4911e-04
Loss = 1.5301e-01, PNorm = 74.8353, GNorm = 1.1360, lr_0 = 4.4880e-04
Loss = 1.3328e-01, PNorm = 74.8573, GNorm = 1.1729, lr_0 = 4.4849e-04
Loss = 1.4945e-01, PNorm = 74.8771, GNorm = 1.0349, lr_0 = 4.4819e-04
Loss = 1.1912e-01, PNorm = 74.8924, GNorm = 0.5347, lr_0 = 4.4788e-04
Loss = 1.1721e-01, PNorm = 74.9079, GNorm = 1.4392, lr_0 = 4.4757e-04
Loss = 1.2277e-01, PNorm = 74.9216, GNorm = 0.8137, lr_0 = 4.4727e-04
Loss = 1.1357e-01, PNorm = 74.9312, GNorm = 0.6732, lr_0 = 4.4696e-04
Loss = 1.4950e-01, PNorm = 74.9431, GNorm = 0.5762, lr_0 = 4.4665e-04
Loss = 1.3025e-01, PNorm = 74.9604, GNorm = 0.6209, lr_0 = 4.4635e-04
Loss = 1.1939e-01, PNorm = 74.9722, GNorm = 0.6445, lr_0 = 4.4604e-04
Loss = 1.2004e-01, PNorm = 74.9843, GNorm = 0.7763, lr_0 = 4.4574e-04
Loss = 1.2632e-01, PNorm = 74.9934, GNorm = 0.6281, lr_0 = 4.4543e-04
Loss = 1.3285e-01, PNorm = 75.0079, GNorm = 0.6140, lr_0 = 4.4513e-04
Loss = 1.2168e-01, PNorm = 75.0204, GNorm = 0.6665, lr_0 = 4.4482e-04
Loss = 1.2956e-01, PNorm = 75.0325, GNorm = 0.5496, lr_0 = 4.4452e-04
Loss = 1.3327e-01, PNorm = 75.0485, GNorm = 0.6652, lr_0 = 4.4421e-04
Loss = 1.2659e-01, PNorm = 75.0585, GNorm = 0.7302, lr_0 = 4.4391e-04
Loss = 1.3418e-01, PNorm = 75.0683, GNorm = 0.7460, lr_0 = 4.4360e-04
Loss = 1.2791e-01, PNorm = 75.0771, GNorm = 0.7406, lr_0 = 4.4330e-04
Loss = 1.2637e-01, PNorm = 75.0849, GNorm = 0.7045, lr_0 = 4.4299e-04
Loss = 1.0741e-01, PNorm = 75.0982, GNorm = 0.6092, lr_0 = 4.4269e-04
Loss = 1.1707e-01, PNorm = 75.1118, GNorm = 0.6761, lr_0 = 4.4239e-04
Loss = 1.4093e-01, PNorm = 75.1164, GNorm = 0.8584, lr_0 = 4.4209e-04
Loss = 1.3987e-01, PNorm = 75.1307, GNorm = 1.1054, lr_0 = 4.4178e-04
Loss = 1.5165e-01, PNorm = 75.1390, GNorm = 0.8070, lr_0 = 4.4148e-04
Loss = 1.3372e-01, PNorm = 75.1508, GNorm = 0.5811, lr_0 = 4.4118e-04
Loss = 1.3466e-01, PNorm = 75.1635, GNorm = 0.7773, lr_0 = 4.4088e-04
Loss = 1.2512e-01, PNorm = 75.1762, GNorm = 0.5799, lr_0 = 4.4057e-04
Loss = 1.4871e-01, PNorm = 75.1893, GNorm = 0.7239, lr_0 = 4.4027e-04
Loss = 1.3731e-01, PNorm = 75.2062, GNorm = 0.7933, lr_0 = 4.3997e-04
Loss = 1.2968e-01, PNorm = 75.2211, GNorm = 0.7818, lr_0 = 4.3967e-04
Loss = 1.2819e-01, PNorm = 75.2329, GNorm = 0.6217, lr_0 = 4.3937e-04
Validation mae = 0.233961
Epoch 12
Loss = 1.2327e-01, PNorm = 75.2453, GNorm = 0.8317, lr_0 = 4.3907e-04
Loss = 1.2170e-01, PNorm = 75.2602, GNorm = 0.9015, lr_0 = 4.3877e-04
Loss = 1.0911e-01, PNorm = 75.2761, GNorm = 0.6235, lr_0 = 4.3846e-04
Loss = 1.2915e-01, PNorm = 75.2846, GNorm = 1.3867, lr_0 = 4.3816e-04
Loss = 1.2166e-01, PNorm = 75.3029, GNorm = 0.8053, lr_0 = 4.3786e-04
Loss = 1.1764e-01, PNorm = 75.3109, GNorm = 0.6692, lr_0 = 4.3756e-04
Loss = 1.1778e-01, PNorm = 75.3205, GNorm = 0.6426, lr_0 = 4.3726e-04
Loss = 1.1211e-01, PNorm = 75.3344, GNorm = 0.7634, lr_0 = 4.3696e-04
Loss = 1.0491e-01, PNorm = 75.3486, GNorm = 0.6546, lr_0 = 4.3667e-04
Loss = 1.3144e-01, PNorm = 75.3544, GNorm = 0.5422, lr_0 = 4.3637e-04
Loss = 1.2727e-01, PNorm = 75.3665, GNorm = 0.9546, lr_0 = 4.3607e-04
Loss = 1.2893e-01, PNorm = 75.3758, GNorm = 1.0564, lr_0 = 4.3577e-04
Loss = 1.1310e-01, PNorm = 75.3897, GNorm = 0.9048, lr_0 = 4.3547e-04
Loss = 1.1581e-01, PNorm = 75.3981, GNorm = 0.7545, lr_0 = 4.3517e-04
Loss = 1.1415e-01, PNorm = 75.4095, GNorm = 0.5115, lr_0 = 4.3487e-04
Loss = 1.3018e-01, PNorm = 75.4226, GNorm = 0.7846, lr_0 = 4.3458e-04
Loss = 1.2084e-01, PNorm = 75.4372, GNorm = 0.7499, lr_0 = 4.3428e-04
Loss = 1.1444e-01, PNorm = 75.4490, GNorm = 1.0923, lr_0 = 4.3398e-04
Loss = 1.2986e-01, PNorm = 75.4555, GNorm = 1.2397, lr_0 = 4.3368e-04
Loss = 1.2507e-01, PNorm = 75.4701, GNorm = 0.7587, lr_0 = 4.3339e-04
Loss = 1.1431e-01, PNorm = 75.4795, GNorm = 0.5139, lr_0 = 4.3309e-04
Loss = 1.2572e-01, PNorm = 75.4908, GNorm = 0.5757, lr_0 = 4.3279e-04
Loss = 1.2534e-01, PNorm = 75.5017, GNorm = 0.7281, lr_0 = 4.3250e-04
Loss = 1.0748e-01, PNorm = 75.5169, GNorm = 0.4924, lr_0 = 4.3220e-04
Loss = 1.2102e-01, PNorm = 75.5294, GNorm = 0.5877, lr_0 = 4.3190e-04
Loss = 1.1646e-01, PNorm = 75.5383, GNorm = 0.5818, lr_0 = 4.3161e-04
Loss = 1.2943e-01, PNorm = 75.5506, GNorm = 0.8561, lr_0 = 4.3131e-04
Loss = 1.2087e-01, PNorm = 75.5653, GNorm = 0.5913, lr_0 = 4.3102e-04
Loss = 1.1964e-01, PNorm = 75.5832, GNorm = 0.8532, lr_0 = 4.3072e-04
Loss = 1.1477e-01, PNorm = 75.5973, GNorm = 0.6685, lr_0 = 4.3043e-04
Loss = 1.5151e-01, PNorm = 75.6097, GNorm = 1.1120, lr_0 = 4.3013e-04
Loss = 1.3113e-01, PNorm = 75.6237, GNorm = 0.5174, lr_0 = 4.2984e-04
Loss = 1.1835e-01, PNorm = 75.6346, GNorm = 0.6415, lr_0 = 4.2954e-04
Loss = 1.0942e-01, PNorm = 75.6456, GNorm = 0.4992, lr_0 = 4.2925e-04
Loss = 1.2709e-01, PNorm = 75.6563, GNorm = 0.7244, lr_0 = 4.2895e-04
Loss = 1.2621e-01, PNorm = 75.6712, GNorm = 0.4773, lr_0 = 4.2866e-04
Loss = 1.3102e-01, PNorm = 75.6821, GNorm = 0.7564, lr_0 = 4.2837e-04
Loss = 1.4048e-01, PNorm = 75.6924, GNorm = 0.5300, lr_0 = 4.2807e-04
Loss = 1.3782e-01, PNorm = 75.6985, GNorm = 0.5875, lr_0 = 4.2778e-04
Loss = 1.1863e-01, PNorm = 75.7049, GNorm = 0.6704, lr_0 = 4.2749e-04
Loss = 1.2125e-01, PNorm = 75.7171, GNorm = 0.8064, lr_0 = 4.2719e-04
Loss = 1.3284e-01, PNorm = 75.7332, GNorm = 0.6304, lr_0 = 4.2690e-04
Loss = 1.2441e-01, PNorm = 75.7466, GNorm = 0.7769, lr_0 = 4.2661e-04
Loss = 1.2096e-01, PNorm = 75.7561, GNorm = 0.5909, lr_0 = 4.2632e-04
Loss = 1.0909e-01, PNorm = 75.7698, GNorm = 0.6670, lr_0 = 4.2602e-04
Loss = 1.2285e-01, PNorm = 75.7828, GNorm = 0.6553, lr_0 = 4.2573e-04
Loss = 1.2469e-01, PNorm = 75.7968, GNorm = 0.6447, lr_0 = 4.2544e-04
Loss = 1.1686e-01, PNorm = 75.8132, GNorm = 0.7657, lr_0 = 4.2515e-04
Loss = 1.2894e-01, PNorm = 75.8260, GNorm = 0.8458, lr_0 = 4.2486e-04
Loss = 1.0662e-01, PNorm = 75.8373, GNorm = 0.7030, lr_0 = 4.2457e-04
Loss = 1.2771e-01, PNorm = 75.8452, GNorm = 0.5948, lr_0 = 4.2428e-04
Loss = 1.2063e-01, PNorm = 75.8558, GNorm = 0.6401, lr_0 = 4.2399e-04
Loss = 1.1643e-01, PNorm = 75.8656, GNorm = 0.6702, lr_0 = 4.2370e-04
Loss = 1.1791e-01, PNorm = 75.8755, GNorm = 1.1642, lr_0 = 4.2340e-04
Loss = 1.1607e-01, PNorm = 75.8857, GNorm = 0.6969, lr_0 = 4.2311e-04
Loss = 1.2607e-01, PNorm = 75.9043, GNorm = 0.7798, lr_0 = 4.2283e-04
Loss = 1.4453e-01, PNorm = 75.9136, GNorm = 0.8933, lr_0 = 4.2254e-04
Loss = 1.4220e-01, PNorm = 75.9283, GNorm = 0.9053, lr_0 = 4.2225e-04
Loss = 1.2181e-01, PNorm = 75.9403, GNorm = 0.5995, lr_0 = 4.2196e-04
Loss = 1.1885e-01, PNorm = 75.9508, GNorm = 0.6356, lr_0 = 4.2167e-04
Loss = 1.3121e-01, PNorm = 75.9647, GNorm = 1.0465, lr_0 = 4.2138e-04
Loss = 1.1591e-01, PNorm = 75.9781, GNorm = 0.6769, lr_0 = 4.2109e-04
Loss = 1.1674e-01, PNorm = 75.9887, GNorm = 0.9025, lr_0 = 4.2080e-04
Loss = 1.2633e-01, PNorm = 75.9995, GNorm = 0.7004, lr_0 = 4.2051e-04
Loss = 1.1614e-01, PNorm = 76.0121, GNorm = 1.3593, lr_0 = 4.2023e-04
Loss = 1.3480e-01, PNorm = 76.0183, GNorm = 0.6687, lr_0 = 4.1994e-04
Loss = 1.2983e-01, PNorm = 76.0308, GNorm = 1.1664, lr_0 = 4.1965e-04
Loss = 1.2610e-01, PNorm = 76.0398, GNorm = 0.6115, lr_0 = 4.1936e-04
Loss = 1.3288e-01, PNorm = 76.0472, GNorm = 0.7501, lr_0 = 4.1907e-04
Loss = 1.1649e-01, PNorm = 76.0585, GNorm = 0.6758, lr_0 = 4.1879e-04
Loss = 1.1876e-01, PNorm = 76.0672, GNorm = 0.6717, lr_0 = 4.1850e-04
Loss = 1.1460e-01, PNorm = 76.0793, GNorm = 0.8334, lr_0 = 4.1821e-04
Loss = 1.2810e-01, PNorm = 76.0875, GNorm = 0.6943, lr_0 = 4.1793e-04
Loss = 1.0750e-01, PNorm = 76.0961, GNorm = 0.5991, lr_0 = 4.1764e-04
Loss = 1.1239e-01, PNorm = 76.1083, GNorm = 0.5438, lr_0 = 4.1736e-04
Loss = 1.0982e-01, PNorm = 76.1202, GNorm = 0.8023, lr_0 = 4.1707e-04
Loss = 1.1990e-01, PNorm = 76.1318, GNorm = 0.6149, lr_0 = 4.1678e-04
Loss = 1.5305e-01, PNorm = 76.1443, GNorm = 0.9584, lr_0 = 4.1650e-04
Loss = 1.2877e-01, PNorm = 76.1589, GNorm = 0.7724, lr_0 = 4.1621e-04
Loss = 1.2570e-01, PNorm = 76.1707, GNorm = 0.6788, lr_0 = 4.1593e-04
Loss = 1.3116e-01, PNorm = 76.1800, GNorm = 0.6414, lr_0 = 4.1564e-04
Loss = 1.2677e-01, PNorm = 76.1939, GNorm = 0.7850, lr_0 = 4.1536e-04
Loss = 1.3296e-01, PNorm = 76.2001, GNorm = 0.8268, lr_0 = 4.1507e-04
Loss = 1.2915e-01, PNorm = 76.2142, GNorm = 0.4645, lr_0 = 4.1479e-04
Loss = 1.1446e-01, PNorm = 76.2227, GNorm = 0.9433, lr_0 = 4.1450e-04
Loss = 1.2438e-01, PNorm = 76.2282, GNorm = 0.6374, lr_0 = 4.1422e-04
Loss = 1.2649e-01, PNorm = 76.2386, GNorm = 0.6921, lr_0 = 4.1394e-04
Loss = 1.1769e-01, PNorm = 76.2513, GNorm = 0.7634, lr_0 = 4.1365e-04
Loss = 1.2114e-01, PNorm = 76.2600, GNorm = 0.8361, lr_0 = 4.1337e-04
Loss = 1.2666e-01, PNorm = 76.2740, GNorm = 0.7279, lr_0 = 4.1309e-04
Loss = 1.2101e-01, PNorm = 76.2859, GNorm = 0.8811, lr_0 = 4.1280e-04
Loss = 1.2940e-01, PNorm = 76.2997, GNorm = 0.5690, lr_0 = 4.1252e-04
Loss = 1.3686e-01, PNorm = 76.3129, GNorm = 0.8152, lr_0 = 4.1224e-04
Loss = 1.1508e-01, PNorm = 76.3241, GNorm = 0.8855, lr_0 = 4.1196e-04
Loss = 1.1834e-01, PNorm = 76.3381, GNorm = 0.6649, lr_0 = 4.1167e-04
Loss = 1.3260e-01, PNorm = 76.3519, GNorm = 0.8247, lr_0 = 4.1139e-04
Loss = 1.3832e-01, PNorm = 76.3660, GNorm = 0.6305, lr_0 = 4.1111e-04
Loss = 1.1156e-01, PNorm = 76.3791, GNorm = 0.6612, lr_0 = 4.1083e-04
Loss = 1.1171e-01, PNorm = 76.3873, GNorm = 0.6957, lr_0 = 4.1055e-04
Loss = 1.2769e-01, PNorm = 76.3935, GNorm = 0.6760, lr_0 = 4.1027e-04
Loss = 1.1125e-01, PNorm = 76.4046, GNorm = 0.5229, lr_0 = 4.0998e-04
Loss = 1.2527e-01, PNorm = 76.4150, GNorm = 0.6959, lr_0 = 4.0970e-04
Loss = 1.1119e-01, PNorm = 76.4275, GNorm = 0.6381, lr_0 = 4.0942e-04
Loss = 1.1346e-01, PNorm = 76.4394, GNorm = 0.5901, lr_0 = 4.0914e-04
Loss = 1.2257e-01, PNorm = 76.4450, GNorm = 0.5672, lr_0 = 4.0886e-04
Loss = 1.0479e-01, PNorm = 76.4530, GNorm = 0.6599, lr_0 = 4.0858e-04
Loss = 1.2075e-01, PNorm = 76.4590, GNorm = 1.0539, lr_0 = 4.0830e-04
Loss = 1.0299e-01, PNorm = 76.4720, GNorm = 0.7318, lr_0 = 4.0802e-04
Loss = 1.2727e-01, PNorm = 76.4829, GNorm = 0.7570, lr_0 = 4.0774e-04
Loss = 1.2332e-01, PNorm = 76.4873, GNorm = 0.9829, lr_0 = 4.0746e-04
Loss = 1.1645e-01, PNorm = 76.4998, GNorm = 0.5802, lr_0 = 4.0718e-04
Loss = 1.2552e-01, PNorm = 76.5040, GNorm = 0.5266, lr_0 = 4.0691e-04
Loss = 1.3307e-01, PNorm = 76.5090, GNorm = 0.6207, lr_0 = 4.0663e-04
Loss = 1.1840e-01, PNorm = 76.5139, GNorm = 0.5799, lr_0 = 4.0635e-04
Loss = 1.2596e-01, PNorm = 76.5183, GNorm = 0.5422, lr_0 = 4.0607e-04
Loss = 1.3159e-01, PNorm = 76.5284, GNorm = 0.7277, lr_0 = 4.0579e-04
Loss = 1.2745e-01, PNorm = 76.5393, GNorm = 1.3773, lr_0 = 4.0551e-04
Loss = 1.2018e-01, PNorm = 76.5479, GNorm = 0.5786, lr_0 = 4.0524e-04
Loss = 1.2184e-01, PNorm = 76.5614, GNorm = 0.7620, lr_0 = 4.0496e-04
Loss = 1.4141e-01, PNorm = 76.5713, GNorm = 0.7076, lr_0 = 4.0468e-04
Validation mae = 0.234451
Epoch 13
Loss = 1.1990e-01, PNorm = 76.5848, GNorm = 0.6103, lr_0 = 4.0440e-04
Loss = 1.2288e-01, PNorm = 76.5975, GNorm = 0.6175, lr_0 = 4.0413e-04
Loss = 1.0597e-01, PNorm = 76.6084, GNorm = 0.7736, lr_0 = 4.0385e-04
Loss = 1.3440e-01, PNorm = 76.6188, GNorm = 0.5973, lr_0 = 4.0357e-04
Loss = 1.1218e-01, PNorm = 76.6313, GNorm = 0.7146, lr_0 = 4.0330e-04
Loss = 1.0485e-01, PNorm = 76.6377, GNorm = 0.6950, lr_0 = 4.0302e-04
Loss = 1.1969e-01, PNorm = 76.6502, GNorm = 0.7255, lr_0 = 4.0274e-04
Loss = 1.1835e-01, PNorm = 76.6630, GNorm = 0.6654, lr_0 = 4.0247e-04
Loss = 1.1258e-01, PNorm = 76.6738, GNorm = 0.6093, lr_0 = 4.0219e-04
Loss = 1.0947e-01, PNorm = 76.6801, GNorm = 0.9166, lr_0 = 4.0192e-04
Loss = 1.3257e-01, PNorm = 76.6916, GNorm = 0.8076, lr_0 = 4.0164e-04
Loss = 1.2055e-01, PNorm = 76.7046, GNorm = 0.4692, lr_0 = 4.0137e-04
Loss = 1.2313e-01, PNorm = 76.7126, GNorm = 0.7542, lr_0 = 4.0109e-04
Loss = 1.2269e-01, PNorm = 76.7242, GNorm = 0.9221, lr_0 = 4.0082e-04
Loss = 1.2649e-01, PNorm = 76.7323, GNorm = 0.7458, lr_0 = 4.0054e-04
Loss = 1.1901e-01, PNorm = 76.7475, GNorm = 0.9586, lr_0 = 4.0027e-04
Loss = 1.3099e-01, PNorm = 76.7610, GNorm = 0.9604, lr_0 = 3.9999e-04
Loss = 1.1840e-01, PNorm = 76.7734, GNorm = 0.6739, lr_0 = 3.9972e-04
Loss = 1.2778e-01, PNorm = 76.7909, GNorm = 0.9464, lr_0 = 3.9945e-04
Loss = 1.0381e-01, PNorm = 76.8010, GNorm = 0.5970, lr_0 = 3.9917e-04
Loss = 1.1633e-01, PNorm = 76.8057, GNorm = 0.8491, lr_0 = 3.9890e-04
Loss = 1.0612e-01, PNorm = 76.8174, GNorm = 0.6352, lr_0 = 3.9863e-04
Loss = 1.2444e-01, PNorm = 76.8274, GNorm = 0.6693, lr_0 = 3.9835e-04
Loss = 1.0412e-01, PNorm = 76.8357, GNorm = 0.4318, lr_0 = 3.9808e-04
Loss = 9.2879e-02, PNorm = 76.8459, GNorm = 0.4865, lr_0 = 3.9781e-04
Loss = 1.0683e-01, PNorm = 76.8606, GNorm = 0.6812, lr_0 = 3.9753e-04
Loss = 1.1045e-01, PNorm = 76.8665, GNorm = 1.0304, lr_0 = 3.9726e-04
Loss = 1.0723e-01, PNorm = 76.8759, GNorm = 0.6240, lr_0 = 3.9699e-04
Loss = 1.0466e-01, PNorm = 76.8841, GNorm = 0.5920, lr_0 = 3.9672e-04
Loss = 1.1286e-01, PNorm = 76.8907, GNorm = 1.0568, lr_0 = 3.9645e-04
Loss = 1.1380e-01, PNorm = 76.9001, GNorm = 0.8646, lr_0 = 3.9617e-04
Loss = 1.0241e-01, PNorm = 76.9079, GNorm = 0.5995, lr_0 = 3.9590e-04
Loss = 1.1399e-01, PNorm = 76.9211, GNorm = 1.0282, lr_0 = 3.9563e-04
Loss = 1.2240e-01, PNorm = 76.9296, GNorm = 0.5926, lr_0 = 3.9536e-04
Loss = 1.2015e-01, PNorm = 76.9393, GNorm = 0.6097, lr_0 = 3.9509e-04
Loss = 1.2582e-01, PNorm = 76.9459, GNorm = 0.5736, lr_0 = 3.9482e-04
Loss = 1.0481e-01, PNorm = 76.9553, GNorm = 0.8910, lr_0 = 3.9455e-04
Loss = 1.1220e-01, PNorm = 76.9670, GNorm = 0.6471, lr_0 = 3.9428e-04
Loss = 1.1366e-01, PNorm = 76.9784, GNorm = 0.6577, lr_0 = 3.9401e-04
Loss = 1.1744e-01, PNorm = 76.9917, GNorm = 0.7320, lr_0 = 3.9374e-04
Loss = 1.3075e-01, PNorm = 77.0011, GNorm = 0.6838, lr_0 = 3.9347e-04
Loss = 1.1142e-01, PNorm = 77.0089, GNorm = 0.5585, lr_0 = 3.9320e-04
Loss = 1.1995e-01, PNorm = 77.0186, GNorm = 0.5697, lr_0 = 3.9293e-04
Loss = 1.0342e-01, PNorm = 77.0320, GNorm = 0.8095, lr_0 = 3.9266e-04
Loss = 1.2942e-01, PNorm = 77.0483, GNorm = 0.7660, lr_0 = 3.9239e-04
Loss = 1.0173e-01, PNorm = 77.0568, GNorm = 0.6870, lr_0 = 3.9212e-04
Loss = 1.2307e-01, PNorm = 77.0635, GNorm = 0.8472, lr_0 = 3.9185e-04
Loss = 1.1754e-01, PNorm = 77.0728, GNorm = 0.6055, lr_0 = 3.9159e-04
Loss = 1.3499e-01, PNorm = 77.0813, GNorm = 0.7259, lr_0 = 3.9132e-04
Loss = 1.3533e-01, PNorm = 77.0906, GNorm = 0.6913, lr_0 = 3.9105e-04
Loss = 1.0725e-01, PNorm = 77.1005, GNorm = 0.6128, lr_0 = 3.9078e-04
Loss = 1.0690e-01, PNorm = 77.1104, GNorm = 0.8744, lr_0 = 3.9051e-04
Loss = 1.2850e-01, PNorm = 77.1251, GNorm = 0.8295, lr_0 = 3.9025e-04
Loss = 1.2925e-01, PNorm = 77.1340, GNorm = 0.7287, lr_0 = 3.8998e-04
Loss = 1.1883e-01, PNorm = 77.1456, GNorm = 0.7083, lr_0 = 3.8971e-04
Loss = 1.1726e-01, PNorm = 77.1531, GNorm = 0.8485, lr_0 = 3.8945e-04
Loss = 1.3102e-01, PNorm = 77.1641, GNorm = 0.6502, lr_0 = 3.8918e-04
Loss = 1.1380e-01, PNorm = 77.1699, GNorm = 0.8118, lr_0 = 3.8891e-04
Loss = 1.2350e-01, PNorm = 77.1817, GNorm = 0.5340, lr_0 = 3.8865e-04
Loss = 1.2517e-01, PNorm = 77.1927, GNorm = 0.6143, lr_0 = 3.8838e-04
Loss = 1.1321e-01, PNorm = 77.2035, GNorm = 0.5941, lr_0 = 3.8811e-04
Loss = 1.2512e-01, PNorm = 77.2134, GNorm = 0.8651, lr_0 = 3.8785e-04
Loss = 1.0416e-01, PNorm = 77.2239, GNorm = 0.7748, lr_0 = 3.8758e-04
Loss = 1.0036e-01, PNorm = 77.2328, GNorm = 0.7403, lr_0 = 3.8732e-04
Loss = 1.0883e-01, PNorm = 77.2364, GNorm = 0.4703, lr_0 = 3.8705e-04
Loss = 1.1517e-01, PNorm = 77.2463, GNorm = 0.6447, lr_0 = 3.8679e-04
Loss = 1.3131e-01, PNorm = 77.2526, GNorm = 0.5960, lr_0 = 3.8652e-04
Loss = 1.2796e-01, PNorm = 77.2610, GNorm = 0.7599, lr_0 = 3.8626e-04
Loss = 1.1514e-01, PNorm = 77.2722, GNorm = 0.7095, lr_0 = 3.8599e-04
Loss = 1.2465e-01, PNorm = 77.2755, GNorm = 0.6658, lr_0 = 3.8573e-04
Loss = 1.1607e-01, PNorm = 77.2829, GNorm = 0.5094, lr_0 = 3.8546e-04
Loss = 1.1385e-01, PNorm = 77.2942, GNorm = 0.8761, lr_0 = 3.8520e-04
Loss = 1.3855e-01, PNorm = 77.3073, GNorm = 1.1194, lr_0 = 3.8493e-04
Loss = 1.1788e-01, PNorm = 77.3216, GNorm = 0.7149, lr_0 = 3.8467e-04
Loss = 1.3675e-01, PNorm = 77.3369, GNorm = 1.3675, lr_0 = 3.8441e-04
Loss = 1.1816e-01, PNorm = 77.3470, GNorm = 0.8371, lr_0 = 3.8414e-04
Loss = 1.2298e-01, PNorm = 77.3559, GNorm = 0.7453, lr_0 = 3.8388e-04
Loss = 1.1805e-01, PNorm = 77.3658, GNorm = 0.7695, lr_0 = 3.8362e-04
Loss = 1.3106e-01, PNorm = 77.3722, GNorm = 0.6584, lr_0 = 3.8336e-04
Loss = 1.2140e-01, PNorm = 77.3857, GNorm = 1.0058, lr_0 = 3.8309e-04
Loss = 1.3520e-01, PNorm = 77.4053, GNorm = 0.5362, lr_0 = 3.8283e-04
Loss = 1.2385e-01, PNorm = 77.4148, GNorm = 0.7615, lr_0 = 3.8257e-04
Loss = 1.1652e-01, PNorm = 77.4213, GNorm = 0.8017, lr_0 = 3.8231e-04
Loss = 1.0790e-01, PNorm = 77.4258, GNorm = 0.7037, lr_0 = 3.8204e-04
Loss = 1.3056e-01, PNorm = 77.4367, GNorm = 0.8665, lr_0 = 3.8178e-04
Loss = 1.0070e-01, PNorm = 77.4499, GNorm = 0.5507, lr_0 = 3.8152e-04
Loss = 1.2299e-01, PNorm = 77.4578, GNorm = 0.5973, lr_0 = 3.8126e-04
Loss = 1.3717e-01, PNorm = 77.4676, GNorm = 0.7156, lr_0 = 3.8100e-04
Loss = 1.1836e-01, PNorm = 77.4787, GNorm = 0.8439, lr_0 = 3.8074e-04
Loss = 1.1909e-01, PNorm = 77.4839, GNorm = 0.6249, lr_0 = 3.8048e-04
Loss = 1.1893e-01, PNorm = 77.4923, GNorm = 0.7814, lr_0 = 3.8022e-04
Loss = 1.1668e-01, PNorm = 77.5025, GNorm = 0.9202, lr_0 = 3.7995e-04
Loss = 1.0598e-01, PNorm = 77.5165, GNorm = 0.7099, lr_0 = 3.7969e-04
Loss = 1.2519e-01, PNorm = 77.5244, GNorm = 0.8405, lr_0 = 3.7943e-04
Loss = 1.1675e-01, PNorm = 77.5342, GNorm = 0.7265, lr_0 = 3.7917e-04
Loss = 1.3592e-01, PNorm = 77.5407, GNorm = 0.6271, lr_0 = 3.7891e-04
Loss = 1.1699e-01, PNorm = 77.5500, GNorm = 0.7737, lr_0 = 3.7866e-04
Loss = 1.0612e-01, PNorm = 77.5634, GNorm = 0.7136, lr_0 = 3.7840e-04
Loss = 1.0540e-01, PNorm = 77.5748, GNorm = 0.6866, lr_0 = 3.7814e-04
Loss = 1.2830e-01, PNorm = 77.5885, GNorm = 0.6989, lr_0 = 3.7788e-04
Loss = 1.1790e-01, PNorm = 77.6004, GNorm = 0.8154, lr_0 = 3.7762e-04
Loss = 1.3042e-01, PNorm = 77.6070, GNorm = 0.5644, lr_0 = 3.7736e-04
Loss = 1.2799e-01, PNorm = 77.6106, GNorm = 0.8319, lr_0 = 3.7710e-04
Loss = 1.1151e-01, PNorm = 77.6177, GNorm = 0.6016, lr_0 = 3.7684e-04
Loss = 1.2583e-01, PNorm = 77.6280, GNorm = 0.8425, lr_0 = 3.7659e-04
Loss = 1.2096e-01, PNorm = 77.6388, GNorm = 0.6270, lr_0 = 3.7633e-04
Loss = 1.2057e-01, PNorm = 77.6524, GNorm = 0.6664, lr_0 = 3.7607e-04
Loss = 1.4076e-01, PNorm = 77.6640, GNorm = 0.8998, lr_0 = 3.7581e-04
Loss = 1.3136e-01, PNorm = 77.6782, GNorm = 0.7895, lr_0 = 3.7555e-04
Loss = 1.0279e-01, PNorm = 77.6872, GNorm = 0.5469, lr_0 = 3.7530e-04
Loss = 1.2088e-01, PNorm = 77.6976, GNorm = 0.6339, lr_0 = 3.7504e-04
Loss = 1.3490e-01, PNorm = 77.7017, GNorm = 0.7736, lr_0 = 3.7478e-04
Loss = 1.2014e-01, PNorm = 77.7119, GNorm = 0.5794, lr_0 = 3.7453e-04
Loss = 1.3923e-01, PNorm = 77.7217, GNorm = 0.9447, lr_0 = 3.7427e-04
Loss = 1.2260e-01, PNorm = 77.7300, GNorm = 0.7532, lr_0 = 3.7401e-04
Loss = 1.2671e-01, PNorm = 77.7395, GNorm = 0.8917, lr_0 = 3.7376e-04
Loss = 1.1273e-01, PNorm = 77.7506, GNorm = 0.9178, lr_0 = 3.7350e-04
Loss = 1.1893e-01, PNorm = 77.7594, GNorm = 0.5911, lr_0 = 3.7325e-04
Loss = 1.3474e-01, PNorm = 77.7658, GNorm = 0.6431, lr_0 = 3.7299e-04
Loss = 1.1832e-01, PNorm = 77.7751, GNorm = 0.7525, lr_0 = 3.7273e-04
Validation mae = 0.233207
Epoch 14
Loss = 1.2115e-01, PNorm = 77.7853, GNorm = 0.8019, lr_0 = 3.7248e-04
Loss = 1.2428e-01, PNorm = 77.7942, GNorm = 0.7978, lr_0 = 3.7222e-04
Loss = 1.0419e-01, PNorm = 77.8080, GNorm = 0.6665, lr_0 = 3.7197e-04
Loss = 1.0240e-01, PNorm = 77.8167, GNorm = 0.6293, lr_0 = 3.7171e-04
Loss = 9.3084e-02, PNorm = 77.8241, GNorm = 0.5404, lr_0 = 3.7146e-04
Loss = 1.1763e-01, PNorm = 77.8367, GNorm = 0.6614, lr_0 = 3.7120e-04
Loss = 1.1787e-01, PNorm = 77.8498, GNorm = 0.7837, lr_0 = 3.7095e-04
Loss = 9.6564e-02, PNorm = 77.8663, GNorm = 0.5893, lr_0 = 3.7070e-04
Loss = 1.1523e-01, PNorm = 77.8782, GNorm = 1.0006, lr_0 = 3.7044e-04
Loss = 1.1585e-01, PNorm = 77.8872, GNorm = 1.0178, lr_0 = 3.7019e-04
Loss = 1.1232e-01, PNorm = 77.8977, GNorm = 0.7450, lr_0 = 3.6993e-04
Loss = 1.0446e-01, PNorm = 77.9019, GNorm = 0.6425, lr_0 = 3.6968e-04
Loss = 1.0019e-01, PNorm = 77.9068, GNorm = 0.7471, lr_0 = 3.6943e-04
Loss = 1.0464e-01, PNorm = 77.9151, GNorm = 0.6072, lr_0 = 3.6917e-04
Loss = 1.0329e-01, PNorm = 77.9221, GNorm = 0.7918, lr_0 = 3.6892e-04
Loss = 9.9080e-02, PNorm = 77.9360, GNorm = 1.0353, lr_0 = 3.6867e-04
Loss = 1.0600e-01, PNorm = 77.9479, GNorm = 0.5856, lr_0 = 3.6842e-04
Loss = 1.0449e-01, PNorm = 77.9523, GNorm = 1.0230, lr_0 = 3.6816e-04
Loss = 1.0779e-01, PNorm = 77.9614, GNorm = 0.5924, lr_0 = 3.6791e-04
Loss = 1.1430e-01, PNorm = 77.9688, GNorm = 0.6079, lr_0 = 3.6766e-04
Loss = 1.1650e-01, PNorm = 77.9781, GNorm = 0.6958, lr_0 = 3.6741e-04
Loss = 1.0650e-01, PNorm = 77.9892, GNorm = 0.5396, lr_0 = 3.6716e-04
Loss = 1.1301e-01, PNorm = 77.9959, GNorm = 0.6549, lr_0 = 3.6690e-04
Loss = 1.1488e-01, PNorm = 78.0052, GNorm = 0.9087, lr_0 = 3.6665e-04
Loss = 1.2786e-01, PNorm = 78.0147, GNorm = 0.9399, lr_0 = 3.6640e-04
Loss = 1.1674e-01, PNorm = 78.0255, GNorm = 0.7152, lr_0 = 3.6615e-04
Loss = 1.1533e-01, PNorm = 78.0373, GNorm = 0.7217, lr_0 = 3.6590e-04
Loss = 1.1469e-01, PNorm = 78.0460, GNorm = 0.7307, lr_0 = 3.6565e-04
Loss = 9.7997e-02, PNorm = 78.0478, GNorm = 0.9118, lr_0 = 3.6540e-04
Loss = 1.2283e-01, PNorm = 78.0507, GNorm = 0.4811, lr_0 = 3.6515e-04
Loss = 1.2181e-01, PNorm = 78.0614, GNorm = 0.5817, lr_0 = 3.6490e-04
Loss = 1.0820e-01, PNorm = 78.0719, GNorm = 0.5294, lr_0 = 3.6465e-04
Loss = 1.1303e-01, PNorm = 78.0810, GNorm = 0.6460, lr_0 = 3.6440e-04
Loss = 1.1400e-01, PNorm = 78.0908, GNorm = 0.6060, lr_0 = 3.6415e-04
Loss = 1.0444e-01, PNorm = 78.1010, GNorm = 0.6378, lr_0 = 3.6390e-04
Loss = 1.0932e-01, PNorm = 78.1072, GNorm = 1.1583, lr_0 = 3.6365e-04
Loss = 1.2988e-01, PNorm = 78.1170, GNorm = 0.9103, lr_0 = 3.6340e-04
Loss = 1.0888e-01, PNorm = 78.1263, GNorm = 0.5426, lr_0 = 3.6315e-04
Loss = 1.0214e-01, PNorm = 78.1337, GNorm = 0.6451, lr_0 = 3.6290e-04
Loss = 1.1034e-01, PNorm = 78.1415, GNorm = 0.7041, lr_0 = 3.6266e-04
Loss = 1.2313e-01, PNorm = 78.1478, GNorm = 0.6766, lr_0 = 3.6241e-04
Loss = 1.1465e-01, PNorm = 78.1604, GNorm = 0.5705, lr_0 = 3.6216e-04
Loss = 1.0802e-01, PNorm = 78.1696, GNorm = 0.5595, lr_0 = 3.6191e-04
Loss = 1.0618e-01, PNorm = 78.1807, GNorm = 0.4919, lr_0 = 3.6166e-04
Loss = 1.1654e-01, PNorm = 78.1910, GNorm = 0.5275, lr_0 = 3.6141e-04
Loss = 1.0700e-01, PNorm = 78.2005, GNorm = 0.6408, lr_0 = 3.6117e-04
Loss = 1.3101e-01, PNorm = 78.2078, GNorm = 0.8776, lr_0 = 3.6092e-04
Loss = 1.1436e-01, PNorm = 78.2132, GNorm = 0.6168, lr_0 = 3.6067e-04
Loss = 1.1157e-01, PNorm = 78.2229, GNorm = 1.1650, lr_0 = 3.6043e-04
Loss = 1.1903e-01, PNorm = 78.2299, GNorm = 0.6130, lr_0 = 3.6018e-04
Loss = 1.1642e-01, PNorm = 78.2395, GNorm = 1.0380, lr_0 = 3.5993e-04
Loss = 1.0657e-01, PNorm = 78.2468, GNorm = 0.6323, lr_0 = 3.5969e-04
Loss = 1.2149e-01, PNorm = 78.2561, GNorm = 0.7477, lr_0 = 3.5944e-04
Loss = 1.2270e-01, PNorm = 78.2637, GNorm = 0.6613, lr_0 = 3.5919e-04
Loss = 9.9780e-02, PNorm = 78.2729, GNorm = 0.9378, lr_0 = 3.5895e-04
Loss = 1.1119e-01, PNorm = 78.2795, GNorm = 0.7233, lr_0 = 3.5870e-04
Loss = 1.1455e-01, PNorm = 78.2876, GNorm = 0.5745, lr_0 = 3.5845e-04
Loss = 1.3796e-01, PNorm = 78.2935, GNorm = 0.6790, lr_0 = 3.5821e-04
Loss = 1.2307e-01, PNorm = 78.3015, GNorm = 0.7347, lr_0 = 3.5796e-04
Loss = 1.1269e-01, PNorm = 78.3112, GNorm = 0.4688, lr_0 = 3.5772e-04
Loss = 1.1595e-01, PNorm = 78.3215, GNorm = 0.5205, lr_0 = 3.5747e-04
Loss = 1.0835e-01, PNorm = 78.3334, GNorm = 0.6299, lr_0 = 3.5723e-04
Loss = 1.0546e-01, PNorm = 78.3408, GNorm = 0.5390, lr_0 = 3.5698e-04
Loss = 1.1824e-01, PNorm = 78.3520, GNorm = 1.1300, lr_0 = 3.5674e-04
Loss = 1.1506e-01, PNorm = 78.3642, GNorm = 0.8468, lr_0 = 3.5650e-04
Loss = 1.0942e-01, PNorm = 78.3730, GNorm = 0.5585, lr_0 = 3.5625e-04
Loss = 1.0892e-01, PNorm = 78.3778, GNorm = 0.6707, lr_0 = 3.5601e-04
Loss = 1.0952e-01, PNorm = 78.3822, GNorm = 0.6868, lr_0 = 3.5576e-04
Loss = 1.0207e-01, PNorm = 78.3877, GNorm = 0.6182, lr_0 = 3.5552e-04
Loss = 1.0692e-01, PNorm = 78.3988, GNorm = 0.5869, lr_0 = 3.5528e-04
Loss = 1.1789e-01, PNorm = 78.4067, GNorm = 0.7871, lr_0 = 3.5503e-04
Loss = 1.0080e-01, PNorm = 78.4122, GNorm = 0.6015, lr_0 = 3.5479e-04
Loss = 1.2474e-01, PNorm = 78.4203, GNorm = 0.8199, lr_0 = 3.5455e-04
Loss = 1.2796e-01, PNorm = 78.4349, GNorm = 0.7704, lr_0 = 3.5430e-04
Loss = 1.1985e-01, PNorm = 78.4407, GNorm = 0.7736, lr_0 = 3.5406e-04
Loss = 1.1981e-01, PNorm = 78.4494, GNorm = 1.1137, lr_0 = 3.5382e-04
Loss = 1.1942e-01, PNorm = 78.4558, GNorm = 0.7221, lr_0 = 3.5358e-04
Loss = 1.0720e-01, PNorm = 78.4633, GNorm = 0.4212, lr_0 = 3.5333e-04
Loss = 1.2555e-01, PNorm = 78.4744, GNorm = 0.6479, lr_0 = 3.5309e-04
Loss = 1.3268e-01, PNorm = 78.4828, GNorm = 0.7970, lr_0 = 3.5285e-04
Loss = 1.1385e-01, PNorm = 78.4914, GNorm = 0.5174, lr_0 = 3.5261e-04
Loss = 1.0317e-01, PNorm = 78.4957, GNorm = 0.7236, lr_0 = 3.5237e-04
Loss = 1.0949e-01, PNorm = 78.5035, GNorm = 0.6628, lr_0 = 3.5212e-04
Loss = 1.0711e-01, PNorm = 78.5104, GNorm = 0.5072, lr_0 = 3.5188e-04
Loss = 1.0482e-01, PNorm = 78.5250, GNorm = 0.6952, lr_0 = 3.5164e-04
Loss = 1.0415e-01, PNorm = 78.5361, GNorm = 0.6231, lr_0 = 3.5140e-04
Loss = 1.3598e-01, PNorm = 78.5435, GNorm = 0.5116, lr_0 = 3.5116e-04
Loss = 1.2357e-01, PNorm = 78.5539, GNorm = 0.7123, lr_0 = 3.5092e-04
Loss = 1.1369e-01, PNorm = 78.5634, GNorm = 0.6148, lr_0 = 3.5068e-04
Loss = 1.2475e-01, PNorm = 78.5654, GNorm = 0.5522, lr_0 = 3.5044e-04
Loss = 1.2952e-01, PNorm = 78.5712, GNorm = 0.5149, lr_0 = 3.5020e-04
Loss = 1.2406e-01, PNorm = 78.5791, GNorm = 0.8436, lr_0 = 3.4996e-04
Loss = 1.1102e-01, PNorm = 78.5881, GNorm = 0.5386, lr_0 = 3.4972e-04
Loss = 1.0033e-01, PNorm = 78.5937, GNorm = 0.7896, lr_0 = 3.4948e-04
Loss = 1.1108e-01, PNorm = 78.6023, GNorm = 0.7586, lr_0 = 3.4924e-04
Loss = 1.2137e-01, PNorm = 78.6084, GNorm = 0.6199, lr_0 = 3.4900e-04
Loss = 1.0663e-01, PNorm = 78.6177, GNorm = 0.7149, lr_0 = 3.4876e-04
Loss = 1.1739e-01, PNorm = 78.6278, GNorm = 0.6452, lr_0 = 3.4852e-04
Loss = 1.1138e-01, PNorm = 78.6330, GNorm = 0.6668, lr_0 = 3.4828e-04
Loss = 1.2014e-01, PNorm = 78.6401, GNorm = 0.6534, lr_0 = 3.4805e-04
Loss = 1.1699e-01, PNorm = 78.6472, GNorm = 0.9733, lr_0 = 3.4781e-04
Loss = 1.1916e-01, PNorm = 78.6517, GNorm = 0.9800, lr_0 = 3.4757e-04
Loss = 1.2588e-01, PNorm = 78.6589, GNorm = 0.6428, lr_0 = 3.4733e-04
Loss = 1.1972e-01, PNorm = 78.6671, GNorm = 0.7582, lr_0 = 3.4709e-04
Loss = 1.0211e-01, PNorm = 78.6715, GNorm = 0.5947, lr_0 = 3.4686e-04
Loss = 1.1303e-01, PNorm = 78.6815, GNorm = 0.9411, lr_0 = 3.4662e-04
Loss = 1.2389e-01, PNorm = 78.6914, GNorm = 0.8001, lr_0 = 3.4638e-04
Loss = 1.1337e-01, PNorm = 78.6992, GNorm = 0.5737, lr_0 = 3.4614e-04
Loss = 1.0318e-01, PNorm = 78.7099, GNorm = 0.6452, lr_0 = 3.4591e-04
Loss = 1.2253e-01, PNorm = 78.7159, GNorm = 0.5225, lr_0 = 3.4567e-04
Loss = 1.1814e-01, PNorm = 78.7200, GNorm = 0.7335, lr_0 = 3.4543e-04
Loss = 1.1846e-01, PNorm = 78.7264, GNorm = 0.5526, lr_0 = 3.4520e-04
Loss = 1.0999e-01, PNorm = 78.7294, GNorm = 0.7948, lr_0 = 3.4496e-04
Loss = 1.0812e-01, PNorm = 78.7369, GNorm = 0.7526, lr_0 = 3.4472e-04
Loss = 1.1309e-01, PNorm = 78.7452, GNorm = 0.5021, lr_0 = 3.4449e-04
Loss = 1.1071e-01, PNorm = 78.7538, GNorm = 0.6489, lr_0 = 3.4425e-04
Loss = 1.0463e-01, PNorm = 78.7609, GNorm = 0.9251, lr_0 = 3.4402e-04
Loss = 1.1320e-01, PNorm = 78.7677, GNorm = 0.6188, lr_0 = 3.4378e-04
Loss = 1.0108e-01, PNorm = 78.7743, GNorm = 0.4679, lr_0 = 3.4354e-04
Loss = 1.0586e-01, PNorm = 78.7833, GNorm = 0.8449, lr_0 = 3.4331e-04
Validation mae = 0.236047
Epoch 15
Loss = 1.1257e-01, PNorm = 78.7886, GNorm = 0.7049, lr_0 = 3.4307e-04
Loss = 1.0507e-01, PNorm = 78.7971, GNorm = 0.6527, lr_0 = 3.4284e-04
Loss = 9.6309e-02, PNorm = 78.8050, GNorm = 0.6714, lr_0 = 3.4260e-04
Loss = 9.9045e-02, PNorm = 78.8132, GNorm = 0.6689, lr_0 = 3.4237e-04
Loss = 1.0819e-01, PNorm = 78.8204, GNorm = 0.6862, lr_0 = 3.4213e-04
Loss = 1.0490e-01, PNorm = 78.8301, GNorm = 0.5208, lr_0 = 3.4190e-04
Loss = 9.6871e-02, PNorm = 78.8365, GNorm = 0.6962, lr_0 = 3.4167e-04
Loss = 9.7372e-02, PNorm = 78.8407, GNorm = 0.5382, lr_0 = 3.4143e-04
Loss = 1.1450e-01, PNorm = 78.8479, GNorm = 0.9023, lr_0 = 3.4120e-04
Loss = 1.1357e-01, PNorm = 78.8572, GNorm = 0.8079, lr_0 = 3.4096e-04
Loss = 1.1064e-01, PNorm = 78.8662, GNorm = 0.5949, lr_0 = 3.4073e-04
Loss = 1.2013e-01, PNorm = 78.8752, GNorm = 0.5784, lr_0 = 3.4050e-04
Loss = 1.1340e-01, PNorm = 78.8845, GNorm = 0.7996, lr_0 = 3.4026e-04
Loss = 1.0967e-01, PNorm = 78.8953, GNorm = 0.7305, lr_0 = 3.4003e-04
Loss = 1.0539e-01, PNorm = 78.9029, GNorm = 0.4867, lr_0 = 3.3980e-04
Loss = 1.0457e-01, PNorm = 78.9080, GNorm = 0.4922, lr_0 = 3.3956e-04
Loss = 1.0007e-01, PNorm = 78.9199, GNorm = 0.4904, lr_0 = 3.3933e-04
Loss = 1.0699e-01, PNorm = 78.9287, GNorm = 0.8261, lr_0 = 3.3910e-04
Loss = 1.1571e-01, PNorm = 78.9382, GNorm = 0.5915, lr_0 = 3.3887e-04
Loss = 1.1150e-01, PNorm = 78.9468, GNorm = 0.6690, lr_0 = 3.3864e-04
Loss = 1.1305e-01, PNorm = 78.9553, GNorm = 0.5962, lr_0 = 3.3840e-04
Loss = 1.0279e-01, PNorm = 78.9604, GNorm = 0.7704, lr_0 = 3.3817e-04
Loss = 1.0414e-01, PNorm = 78.9670, GNorm = 0.6352, lr_0 = 3.3794e-04
Loss = 1.1740e-01, PNorm = 78.9740, GNorm = 0.8215, lr_0 = 3.3771e-04
Loss = 1.1460e-01, PNorm = 78.9843, GNorm = 1.2091, lr_0 = 3.3748e-04
Loss = 1.0679e-01, PNorm = 78.9928, GNorm = 0.6989, lr_0 = 3.3725e-04
Loss = 1.1154e-01, PNorm = 79.0006, GNorm = 0.7380, lr_0 = 3.3701e-04
Loss = 1.0075e-01, PNorm = 79.0073, GNorm = 0.5661, lr_0 = 3.3678e-04
Loss = 8.9015e-02, PNorm = 79.0132, GNorm = 0.5688, lr_0 = 3.3655e-04
Loss = 1.0935e-01, PNorm = 79.0193, GNorm = 0.5799, lr_0 = 3.3632e-04
Loss = 1.1659e-01, PNorm = 79.0276, GNorm = 0.7300, lr_0 = 3.3609e-04
Loss = 9.6641e-02, PNorm = 79.0334, GNorm = 0.7372, lr_0 = 3.3586e-04
Loss = 1.2129e-01, PNorm = 79.0436, GNorm = 0.7239, lr_0 = 3.3563e-04
Loss = 1.1261e-01, PNorm = 79.0543, GNorm = 0.6055, lr_0 = 3.3540e-04
Loss = 1.0444e-01, PNorm = 79.0638, GNorm = 0.8323, lr_0 = 3.3517e-04
Loss = 1.0909e-01, PNorm = 79.0720, GNorm = 0.8144, lr_0 = 3.3494e-04
Loss = 1.1588e-01, PNorm = 79.0812, GNorm = 0.8601, lr_0 = 3.3471e-04
Loss = 1.0852e-01, PNorm = 79.0896, GNorm = 0.6767, lr_0 = 3.3448e-04
Loss = 1.0917e-01, PNorm = 79.0983, GNorm = 0.7169, lr_0 = 3.3425e-04
Loss = 8.9310e-02, PNorm = 79.1081, GNorm = 0.7951, lr_0 = 3.3403e-04
Loss = 1.0574e-01, PNorm = 79.1183, GNorm = 0.5165, lr_0 = 3.3380e-04
Loss = 1.0958e-01, PNorm = 79.1268, GNorm = 0.6445, lr_0 = 3.3357e-04
Loss = 1.1085e-01, PNorm = 79.1326, GNorm = 0.7117, lr_0 = 3.3334e-04
Loss = 1.1366e-01, PNorm = 79.1389, GNorm = 0.8939, lr_0 = 3.3311e-04
Loss = 1.0621e-01, PNorm = 79.1450, GNorm = 0.5338, lr_0 = 3.3288e-04
Loss = 1.0325e-01, PNorm = 79.1532, GNorm = 0.7045, lr_0 = 3.3265e-04
Loss = 1.0817e-01, PNorm = 79.1601, GNorm = 0.7465, lr_0 = 3.3243e-04
Loss = 1.2493e-01, PNorm = 79.1665, GNorm = 0.7444, lr_0 = 3.3220e-04
Loss = 1.1170e-01, PNorm = 79.1762, GNorm = 0.5576, lr_0 = 3.3197e-04
Loss = 1.2276e-01, PNorm = 79.1836, GNorm = 0.6508, lr_0 = 3.3174e-04
Loss = 1.0893e-01, PNorm = 79.1966, GNorm = 0.8144, lr_0 = 3.3152e-04
Loss = 1.2066e-01, PNorm = 79.2017, GNorm = 0.7418, lr_0 = 3.3129e-04
Loss = 1.0803e-01, PNorm = 79.2078, GNorm = 0.5029, lr_0 = 3.3106e-04
Loss = 9.9983e-02, PNorm = 79.2166, GNorm = 0.7535, lr_0 = 3.3084e-04
Loss = 1.2550e-01, PNorm = 79.2267, GNorm = 0.8392, lr_0 = 3.3061e-04
Loss = 1.0750e-01, PNorm = 79.2349, GNorm = 0.6932, lr_0 = 3.3038e-04
Loss = 9.7478e-02, PNorm = 79.2420, GNorm = 0.7457, lr_0 = 3.3016e-04
Loss = 1.0983e-01, PNorm = 79.2472, GNorm = 0.5915, lr_0 = 3.2993e-04
Loss = 1.0300e-01, PNorm = 79.2542, GNorm = 0.7540, lr_0 = 3.2970e-04
Loss = 1.0438e-01, PNorm = 79.2607, GNorm = 0.7386, lr_0 = 3.2948e-04
Loss = 8.8679e-02, PNorm = 79.2698, GNorm = 0.6856, lr_0 = 3.2925e-04
Loss = 1.2532e-01, PNorm = 79.2789, GNorm = 0.7974, lr_0 = 3.2903e-04
Loss = 1.0731e-01, PNorm = 79.2851, GNorm = 0.5679, lr_0 = 3.2880e-04
Loss = 1.0353e-01, PNorm = 79.2939, GNorm = 1.1181, lr_0 = 3.2858e-04
Loss = 1.1416e-01, PNorm = 79.3016, GNorm = 0.5357, lr_0 = 3.2835e-04
Loss = 1.0809e-01, PNorm = 79.3118, GNorm = 0.7731, lr_0 = 3.2813e-04
Loss = 1.0698e-01, PNorm = 79.3208, GNorm = 0.7927, lr_0 = 3.2790e-04
Loss = 9.7947e-02, PNorm = 79.3260, GNorm = 0.8231, lr_0 = 3.2768e-04
Loss = 1.0629e-01, PNorm = 79.3311, GNorm = 0.7241, lr_0 = 3.2745e-04
Loss = 1.0361e-01, PNorm = 79.3405, GNorm = 1.0821, lr_0 = 3.2723e-04
Loss = 1.0789e-01, PNorm = 79.3489, GNorm = 0.4866, lr_0 = 3.2700e-04
Loss = 1.0713e-01, PNorm = 79.3525, GNorm = 0.9590, lr_0 = 3.2678e-04
Loss = 1.0126e-01, PNorm = 79.3560, GNorm = 0.9303, lr_0 = 3.2656e-04
Loss = 1.1520e-01, PNorm = 79.3606, GNorm = 0.7314, lr_0 = 3.2633e-04
Loss = 1.1383e-01, PNorm = 79.3696, GNorm = 0.6072, lr_0 = 3.2611e-04
Loss = 1.1812e-01, PNorm = 79.3773, GNorm = 0.7594, lr_0 = 3.2589e-04
Loss = 1.1881e-01, PNorm = 79.3787, GNorm = 0.8426, lr_0 = 3.2566e-04
Loss = 1.1273e-01, PNorm = 79.3850, GNorm = 1.0581, lr_0 = 3.2544e-04
Loss = 1.1406e-01, PNorm = 79.3903, GNorm = 0.5643, lr_0 = 3.2522e-04
Loss = 1.1333e-01, PNorm = 79.3972, GNorm = 0.6659, lr_0 = 3.2499e-04
Loss = 1.0222e-01, PNorm = 79.4035, GNorm = 0.7188, lr_0 = 3.2477e-04
Loss = 1.0789e-01, PNorm = 79.4140, GNorm = 0.5339, lr_0 = 3.2455e-04
Loss = 8.6548e-02, PNorm = 79.4233, GNorm = 0.5276, lr_0 = 3.2433e-04
Loss = 1.0788e-01, PNorm = 79.4320, GNorm = 0.5593, lr_0 = 3.2410e-04
Loss = 1.0834e-01, PNorm = 79.4362, GNorm = 0.7057, lr_0 = 3.2388e-04
Loss = 1.0497e-01, PNorm = 79.4430, GNorm = 0.6372, lr_0 = 3.2366e-04
Loss = 1.0591e-01, PNorm = 79.4496, GNorm = 0.7708, lr_0 = 3.2344e-04
Loss = 1.1497e-01, PNorm = 79.4597, GNorm = 0.6157, lr_0 = 3.2322e-04
Loss = 1.1224e-01, PNorm = 79.4700, GNorm = 0.6531, lr_0 = 3.2300e-04
Loss = 1.1438e-01, PNorm = 79.4741, GNorm = 0.7796, lr_0 = 3.2277e-04
Loss = 1.2380e-01, PNorm = 79.4814, GNorm = 0.8622, lr_0 = 3.2255e-04
Loss = 1.0519e-01, PNorm = 79.4869, GNorm = 0.7424, lr_0 = 3.2233e-04
Loss = 1.1014e-01, PNorm = 79.4918, GNorm = 0.6730, lr_0 = 3.2211e-04
Loss = 9.9525e-02, PNorm = 79.4998, GNorm = 0.6108, lr_0 = 3.2189e-04
Loss = 1.1401e-01, PNorm = 79.5050, GNorm = 0.5886, lr_0 = 3.2167e-04
Loss = 1.1583e-01, PNorm = 79.5116, GNorm = 0.6620, lr_0 = 3.2145e-04
Loss = 1.0784e-01, PNorm = 79.5184, GNorm = 0.6208, lr_0 = 3.2123e-04
Loss = 1.0854e-01, PNorm = 79.5216, GNorm = 0.6569, lr_0 = 3.2101e-04
Loss = 1.0349e-01, PNorm = 79.5263, GNorm = 0.4749, lr_0 = 3.2079e-04
Loss = 1.1974e-01, PNorm = 79.5338, GNorm = 0.6404, lr_0 = 3.2057e-04
Loss = 1.0807e-01, PNorm = 79.5411, GNorm = 0.8588, lr_0 = 3.2035e-04
Loss = 9.7984e-02, PNorm = 79.5511, GNorm = 1.0627, lr_0 = 3.2013e-04
Loss = 1.1424e-01, PNorm = 79.5513, GNorm = 0.9725, lr_0 = 3.1991e-04
Loss = 1.1622e-01, PNorm = 79.5519, GNorm = 0.6171, lr_0 = 3.1969e-04
Loss = 1.1460e-01, PNorm = 79.5586, GNorm = 0.7893, lr_0 = 3.1947e-04
Loss = 1.0324e-01, PNorm = 79.5609, GNorm = 0.7625, lr_0 = 3.1925e-04
Loss = 1.1627e-01, PNorm = 79.5693, GNorm = 0.6844, lr_0 = 3.1904e-04
Loss = 1.1415e-01, PNorm = 79.5814, GNorm = 0.4985, lr_0 = 3.1882e-04
Loss = 1.1487e-01, PNorm = 79.5930, GNorm = 0.6768, lr_0 = 3.1860e-04
Loss = 1.1997e-01, PNorm = 79.6031, GNorm = 0.7278, lr_0 = 3.1838e-04
Loss = 1.1387e-01, PNorm = 79.6087, GNorm = 0.7338, lr_0 = 3.1816e-04
Loss = 9.1820e-02, PNorm = 79.6148, GNorm = 0.4614, lr_0 = 3.1794e-04
Loss = 1.1119e-01, PNorm = 79.6179, GNorm = 0.5851, lr_0 = 3.1773e-04
Loss = 1.0709e-01, PNorm = 79.6225, GNorm = 0.6114, lr_0 = 3.1751e-04
Loss = 1.3285e-01, PNorm = 79.6247, GNorm = 0.6424, lr_0 = 3.1729e-04
Loss = 1.0294e-01, PNorm = 79.6329, GNorm = 1.2555, lr_0 = 3.1707e-04
Loss = 1.0567e-01, PNorm = 79.6432, GNorm = 0.7368, lr_0 = 3.1686e-04
Loss = 1.3107e-01, PNorm = 79.6470, GNorm = 0.9028, lr_0 = 3.1664e-04
Loss = 1.0442e-01, PNorm = 79.6570, GNorm = 0.7196, lr_0 = 3.1642e-04
Loss = 1.0317e-01, PNorm = 79.6635, GNorm = 0.6339, lr_0 = 3.1621e-04
Validation mae = 0.231139
Epoch 16
Loss = 8.6986e-02, PNorm = 79.6747, GNorm = 0.5782, lr_0 = 3.1599e-04
Loss = 9.5413e-02, PNorm = 79.6825, GNorm = 0.8908, lr_0 = 3.1577e-04
Loss = 9.9405e-02, PNorm = 79.6891, GNorm = 0.5130, lr_0 = 3.1556e-04
Loss = 9.9799e-02, PNorm = 79.6981, GNorm = 0.7701, lr_0 = 3.1534e-04
Loss = 1.1052e-01, PNorm = 79.7046, GNorm = 0.7364, lr_0 = 3.1512e-04
Loss = 7.4495e-02, PNorm = 79.7107, GNorm = 0.4331, lr_0 = 3.1491e-04
Loss = 1.0315e-01, PNorm = 79.7173, GNorm = 0.6159, lr_0 = 3.1469e-04
Loss = 1.1156e-01, PNorm = 79.7219, GNorm = 1.1149, lr_0 = 3.1448e-04
Loss = 1.0379e-01, PNorm = 79.7345, GNorm = 0.6170, lr_0 = 3.1426e-04
Loss = 1.1024e-01, PNorm = 79.7470, GNorm = 0.6922, lr_0 = 3.1405e-04
Loss = 9.7835e-02, PNorm = 79.7584, GNorm = 0.7192, lr_0 = 3.1383e-04
Loss = 1.0512e-01, PNorm = 79.7665, GNorm = 0.7614, lr_0 = 3.1362e-04
Loss = 1.0805e-01, PNorm = 79.7751, GNorm = 0.6036, lr_0 = 3.1340e-04
Loss = 1.1318e-01, PNorm = 79.7863, GNorm = 0.8104, lr_0 = 3.1319e-04
Loss = 1.0345e-01, PNorm = 79.7939, GNorm = 0.6543, lr_0 = 3.1297e-04
Loss = 9.7915e-02, PNorm = 79.7994, GNorm = 0.6119, lr_0 = 3.1276e-04
Loss = 1.0936e-01, PNorm = 79.8051, GNorm = 0.6326, lr_0 = 3.1254e-04
Loss = 9.8098e-02, PNorm = 79.8110, GNorm = 0.7910, lr_0 = 3.1233e-04
Loss = 9.5879e-02, PNorm = 79.8156, GNorm = 0.5733, lr_0 = 3.1212e-04
Loss = 1.1169e-01, PNorm = 79.8223, GNorm = 0.6823, lr_0 = 3.1190e-04
Loss = 1.0363e-01, PNorm = 79.8309, GNorm = 0.7782, lr_0 = 3.1169e-04
Loss = 1.0877e-01, PNorm = 79.8389, GNorm = 0.6592, lr_0 = 3.1147e-04
Loss = 9.9594e-02, PNorm = 79.8459, GNorm = 0.9368, lr_0 = 3.1126e-04
Loss = 9.7355e-02, PNorm = 79.8578, GNorm = 0.8630, lr_0 = 3.1105e-04
Loss = 1.1484e-01, PNorm = 79.8647, GNorm = 0.6003, lr_0 = 3.1083e-04
Loss = 9.8838e-02, PNorm = 79.8706, GNorm = 0.5644, lr_0 = 3.1062e-04
Loss = 1.1521e-01, PNorm = 79.8798, GNorm = 0.8585, lr_0 = 3.1041e-04
Loss = 1.0479e-01, PNorm = 79.8896, GNorm = 0.5940, lr_0 = 3.1020e-04
Loss = 1.1531e-01, PNorm = 79.8996, GNorm = 0.6246, lr_0 = 3.0998e-04
Loss = 1.0910e-01, PNorm = 79.9077, GNorm = 1.1205, lr_0 = 3.0977e-04
Loss = 9.9004e-02, PNorm = 79.9137, GNorm = 0.6538, lr_0 = 3.0956e-04
Loss = 1.0394e-01, PNorm = 79.9238, GNorm = 0.8162, lr_0 = 3.0935e-04
Loss = 1.0602e-01, PNorm = 79.9345, GNorm = 0.6780, lr_0 = 3.0914e-04
Loss = 9.4637e-02, PNorm = 79.9380, GNorm = 0.5639, lr_0 = 3.0892e-04
Loss = 1.0779e-01, PNorm = 79.9394, GNorm = 0.7114, lr_0 = 3.0871e-04
Loss = 1.1072e-01, PNorm = 79.9437, GNorm = 0.6356, lr_0 = 3.0850e-04
Loss = 1.0451e-01, PNorm = 79.9519, GNorm = 0.6991, lr_0 = 3.0829e-04
Loss = 1.0709e-01, PNorm = 79.9599, GNorm = 0.9677, lr_0 = 3.0808e-04
Loss = 8.3741e-02, PNorm = 79.9648, GNorm = 0.6249, lr_0 = 3.0787e-04
Loss = 9.2593e-02, PNorm = 79.9701, GNorm = 0.7521, lr_0 = 3.0766e-04
Loss = 1.0919e-01, PNorm = 79.9757, GNorm = 0.6879, lr_0 = 3.0745e-04
Loss = 1.2363e-01, PNorm = 79.9838, GNorm = 0.6108, lr_0 = 3.0723e-04
Loss = 1.1269e-01, PNorm = 79.9906, GNorm = 0.7476, lr_0 = 3.0702e-04
Loss = 1.0662e-01, PNorm = 79.9994, GNorm = 0.7233, lr_0 = 3.0681e-04
Loss = 1.0880e-01, PNorm = 80.0083, GNorm = 0.8013, lr_0 = 3.0660e-04
Loss = 9.6579e-02, PNorm = 80.0163, GNorm = 0.9271, lr_0 = 3.0639e-04
Loss = 1.1019e-01, PNorm = 80.0225, GNorm = 0.5561, lr_0 = 3.0618e-04
Loss = 1.0024e-01, PNorm = 80.0290, GNorm = 0.9544, lr_0 = 3.0597e-04
Loss = 9.5870e-02, PNorm = 80.0353, GNorm = 0.7452, lr_0 = 3.0576e-04
Loss = 9.8831e-02, PNorm = 80.0383, GNorm = 0.4938, lr_0 = 3.0555e-04
Loss = 9.4302e-02, PNorm = 80.0429, GNorm = 0.6222, lr_0 = 3.0535e-04
Loss = 1.1281e-01, PNorm = 80.0505, GNorm = 0.6077, lr_0 = 3.0514e-04
Loss = 1.0310e-01, PNorm = 80.0589, GNorm = 0.9719, lr_0 = 3.0493e-04
Loss = 9.5851e-02, PNorm = 80.0654, GNorm = 0.5367, lr_0 = 3.0472e-04
Loss = 1.0506e-01, PNorm = 80.0714, GNorm = 0.7905, lr_0 = 3.0451e-04
Loss = 1.0824e-01, PNorm = 80.0748, GNorm = 0.6234, lr_0 = 3.0430e-04
Loss = 1.1247e-01, PNorm = 80.0793, GNorm = 0.5128, lr_0 = 3.0409e-04
Loss = 1.1102e-01, PNorm = 80.0882, GNorm = 0.7194, lr_0 = 3.0388e-04
Loss = 1.1872e-01, PNorm = 80.0952, GNorm = 0.7734, lr_0 = 3.0368e-04
Loss = 1.2078e-01, PNorm = 80.1037, GNorm = 0.6022, lr_0 = 3.0347e-04
Loss = 1.0183e-01, PNorm = 80.1140, GNorm = 0.6233, lr_0 = 3.0326e-04
Loss = 1.0368e-01, PNorm = 80.1241, GNorm = 0.7669, lr_0 = 3.0305e-04
Loss = 9.8019e-02, PNorm = 80.1317, GNorm = 0.8801, lr_0 = 3.0284e-04
Loss = 1.0302e-01, PNorm = 80.1317, GNorm = 0.8958, lr_0 = 3.0264e-04
Loss = 1.0951e-01, PNorm = 80.1365, GNorm = 0.7934, lr_0 = 3.0243e-04
Loss = 1.0582e-01, PNorm = 80.1411, GNorm = 0.9713, lr_0 = 3.0222e-04
Loss = 1.0389e-01, PNorm = 80.1456, GNorm = 0.5688, lr_0 = 3.0202e-04
Loss = 9.3128e-02, PNorm = 80.1505, GNorm = 0.7750, lr_0 = 3.0181e-04
Loss = 1.0648e-01, PNorm = 80.1593, GNorm = 0.6243, lr_0 = 3.0160e-04
Loss = 1.0072e-01, PNorm = 80.1654, GNorm = 0.6294, lr_0 = 3.0140e-04
Loss = 1.1949e-01, PNorm = 80.1670, GNorm = 0.6643, lr_0 = 3.0119e-04
Loss = 1.2211e-01, PNorm = 80.1673, GNorm = 0.7153, lr_0 = 3.0098e-04
Loss = 1.0363e-01, PNorm = 80.1719, GNorm = 0.6416, lr_0 = 3.0078e-04
Loss = 9.5423e-02, PNorm = 80.1801, GNorm = 0.6339, lr_0 = 3.0057e-04
Loss = 1.0531e-01, PNorm = 80.1865, GNorm = 0.8475, lr_0 = 3.0036e-04
Loss = 1.1853e-01, PNorm = 80.1913, GNorm = 0.6429, lr_0 = 3.0016e-04
Loss = 1.0165e-01, PNorm = 80.1985, GNorm = 0.4726, lr_0 = 2.9995e-04
Loss = 1.1176e-01, PNorm = 80.2058, GNorm = 0.8307, lr_0 = 2.9975e-04
Loss = 1.3328e-01, PNorm = 80.2126, GNorm = 1.4161, lr_0 = 2.9954e-04
Loss = 1.1270e-01, PNorm = 80.2247, GNorm = 0.9185, lr_0 = 2.9934e-04
Loss = 1.0330e-01, PNorm = 80.2326, GNorm = 0.4921, lr_0 = 2.9913e-04
Loss = 1.1232e-01, PNorm = 80.2378, GNorm = 0.6747, lr_0 = 2.9893e-04
Loss = 1.0801e-01, PNorm = 80.2456, GNorm = 0.5658, lr_0 = 2.9872e-04
Loss = 1.0954e-01, PNorm = 80.2492, GNorm = 0.6012, lr_0 = 2.9852e-04
Loss = 1.0540e-01, PNorm = 80.2517, GNorm = 0.5097, lr_0 = 2.9831e-04
Loss = 1.0403e-01, PNorm = 80.2566, GNorm = 0.7467, lr_0 = 2.9811e-04
Loss = 9.5664e-02, PNorm = 80.2629, GNorm = 0.6450, lr_0 = 2.9790e-04
Loss = 1.0122e-01, PNorm = 80.2712, GNorm = 0.9246, lr_0 = 2.9770e-04
Loss = 9.3846e-02, PNorm = 80.2763, GNorm = 0.5242, lr_0 = 2.9750e-04
Loss = 9.6628e-02, PNorm = 80.2799, GNorm = 0.4753, lr_0 = 2.9729e-04
Loss = 1.0918e-01, PNorm = 80.2854, GNorm = 0.8072, lr_0 = 2.9709e-04
Loss = 1.1517e-01, PNorm = 80.2949, GNorm = 0.9512, lr_0 = 2.9689e-04
Loss = 9.2272e-02, PNorm = 80.3051, GNorm = 0.6464, lr_0 = 2.9668e-04
Loss = 9.7177e-02, PNorm = 80.3129, GNorm = 0.6107, lr_0 = 2.9648e-04
Loss = 9.2655e-02, PNorm = 80.3182, GNorm = 0.6251, lr_0 = 2.9628e-04
Loss = 9.9282e-02, PNorm = 80.3238, GNorm = 0.6496, lr_0 = 2.9607e-04
Loss = 1.0641e-01, PNorm = 80.3298, GNorm = 0.7056, lr_0 = 2.9587e-04
Loss = 9.7267e-02, PNorm = 80.3350, GNorm = 0.6995, lr_0 = 2.9567e-04
Loss = 1.1069e-01, PNorm = 80.3399, GNorm = 0.7419, lr_0 = 2.9546e-04
Loss = 1.1703e-01, PNorm = 80.3475, GNorm = 0.5092, lr_0 = 2.9526e-04
Loss = 1.1115e-01, PNorm = 80.3549, GNorm = 0.8170, lr_0 = 2.9506e-04
Loss = 1.0524e-01, PNorm = 80.3588, GNorm = 0.5470, lr_0 = 2.9486e-04
Loss = 1.0252e-01, PNorm = 80.3658, GNorm = 0.7238, lr_0 = 2.9466e-04
Loss = 1.0901e-01, PNorm = 80.3741, GNorm = 0.6479, lr_0 = 2.9445e-04
Loss = 9.9630e-02, PNorm = 80.3782, GNorm = 0.8113, lr_0 = 2.9425e-04
Loss = 1.0833e-01, PNorm = 80.3797, GNorm = 0.7147, lr_0 = 2.9405e-04
Loss = 1.0576e-01, PNorm = 80.3877, GNorm = 0.6987, lr_0 = 2.9385e-04
Loss = 1.1230e-01, PNorm = 80.3973, GNorm = 0.6167, lr_0 = 2.9365e-04
Loss = 1.1088e-01, PNorm = 80.4057, GNorm = 0.4696, lr_0 = 2.9345e-04
Loss = 1.1008e-01, PNorm = 80.4123, GNorm = 0.5728, lr_0 = 2.9325e-04
Loss = 9.7115e-02, PNorm = 80.4148, GNorm = 0.6273, lr_0 = 2.9305e-04
Loss = 1.1582e-01, PNorm = 80.4190, GNorm = 0.7202, lr_0 = 2.9284e-04
Loss = 1.0315e-01, PNorm = 80.4224, GNorm = 0.6243, lr_0 = 2.9264e-04
Loss = 9.3827e-02, PNorm = 80.4270, GNorm = 0.5363, lr_0 = 2.9244e-04
Loss = 1.2631e-01, PNorm = 80.4292, GNorm = 0.6396, lr_0 = 2.9224e-04
Loss = 1.0420e-01, PNorm = 80.4384, GNorm = 0.6917, lr_0 = 2.9204e-04
Loss = 1.1208e-01, PNorm = 80.4450, GNorm = 0.7378, lr_0 = 2.9184e-04
Loss = 1.0931e-01, PNorm = 80.4525, GNorm = 1.0009, lr_0 = 2.9164e-04
Loss = 1.0674e-01, PNorm = 80.4598, GNorm = 0.8889, lr_0 = 2.9144e-04
Loss = 1.0955e-01, PNorm = 80.4665, GNorm = 0.8492, lr_0 = 2.9124e-04
Validation mae = 0.229372
Epoch 17
Loss = 1.0251e-01, PNorm = 80.4766, GNorm = 0.8285, lr_0 = 2.9104e-04
Loss = 1.0688e-01, PNorm = 80.4825, GNorm = 0.8449, lr_0 = 2.9084e-04
Loss = 9.9202e-02, PNorm = 80.4935, GNorm = 0.7865, lr_0 = 2.9065e-04
Loss = 9.5198e-02, PNorm = 80.5011, GNorm = 0.5177, lr_0 = 2.9045e-04
Loss = 1.0167e-01, PNorm = 80.5084, GNorm = 0.9702, lr_0 = 2.9025e-04
Loss = 1.0310e-01, PNorm = 80.5133, GNorm = 0.7822, lr_0 = 2.9005e-04
Loss = 8.9628e-02, PNorm = 80.5194, GNorm = 0.6672, lr_0 = 2.8985e-04
Loss = 1.0740e-01, PNorm = 80.5230, GNorm = 1.2218, lr_0 = 2.8965e-04
Loss = 9.5581e-02, PNorm = 80.5271, GNorm = 0.6009, lr_0 = 2.8945e-04
Loss = 1.0450e-01, PNorm = 80.5340, GNorm = 0.6599, lr_0 = 2.8925e-04
Loss = 8.8411e-02, PNorm = 80.5430, GNorm = 0.5518, lr_0 = 2.8906e-04
Loss = 9.5125e-02, PNorm = 80.5520, GNorm = 0.7405, lr_0 = 2.8886e-04
Loss = 8.9418e-02, PNorm = 80.5604, GNorm = 0.5359, lr_0 = 2.8866e-04
Loss = 1.0329e-01, PNorm = 80.5712, GNorm = 0.5994, lr_0 = 2.8846e-04
Loss = 9.3510e-02, PNorm = 80.5752, GNorm = 0.5791, lr_0 = 2.8826e-04
Loss = 9.7748e-02, PNorm = 80.5791, GNorm = 0.6496, lr_0 = 2.8807e-04
Loss = 9.0921e-02, PNorm = 80.5829, GNorm = 0.6836, lr_0 = 2.8787e-04
Loss = 8.6598e-02, PNorm = 80.5901, GNorm = 0.5917, lr_0 = 2.8767e-04
Loss = 9.5015e-02, PNorm = 80.5940, GNorm = 0.5534, lr_0 = 2.8748e-04
Loss = 1.0144e-01, PNorm = 80.5937, GNorm = 0.5369, lr_0 = 2.8728e-04
Loss = 1.0687e-01, PNorm = 80.5979, GNorm = 0.8418, lr_0 = 2.8708e-04
Loss = 1.0140e-01, PNorm = 80.6074, GNorm = 0.7449, lr_0 = 2.8689e-04
Loss = 9.6408e-02, PNorm = 80.6115, GNorm = 0.5798, lr_0 = 2.8669e-04
Loss = 9.6693e-02, PNorm = 80.6205, GNorm = 0.5574, lr_0 = 2.8649e-04
Loss = 1.0167e-01, PNorm = 80.6259, GNorm = 0.5482, lr_0 = 2.8630e-04
Loss = 1.0343e-01, PNorm = 80.6308, GNorm = 0.7411, lr_0 = 2.8610e-04
Loss = 9.8777e-02, PNorm = 80.6348, GNorm = 0.7371, lr_0 = 2.8590e-04
Loss = 8.9093e-02, PNorm = 80.6386, GNorm = 0.7882, lr_0 = 2.8571e-04
Loss = 9.2450e-02, PNorm = 80.6435, GNorm = 0.5208, lr_0 = 2.8551e-04
Loss = 1.1582e-01, PNorm = 80.6504, GNorm = 1.3853, lr_0 = 2.8532e-04
Loss = 9.9325e-02, PNorm = 80.6590, GNorm = 0.6681, lr_0 = 2.8512e-04
Loss = 1.2231e-01, PNorm = 80.6663, GNorm = 0.8676, lr_0 = 2.8493e-04
Loss = 1.0412e-01, PNorm = 80.6707, GNorm = 0.5590, lr_0 = 2.8473e-04
Loss = 1.0630e-01, PNorm = 80.6762, GNorm = 0.8492, lr_0 = 2.8454e-04
Loss = 1.0883e-01, PNorm = 80.6820, GNorm = 0.6964, lr_0 = 2.8434e-04
Loss = 9.4918e-02, PNorm = 80.6870, GNorm = 0.8795, lr_0 = 2.8415e-04
Loss = 1.0431e-01, PNorm = 80.6915, GNorm = 1.0467, lr_0 = 2.8395e-04
Loss = 9.9168e-02, PNorm = 80.7000, GNorm = 0.7676, lr_0 = 2.8376e-04
Loss = 1.0078e-01, PNorm = 80.7075, GNorm = 0.7478, lr_0 = 2.8356e-04
Loss = 9.4280e-02, PNorm = 80.7152, GNorm = 0.5651, lr_0 = 2.8337e-04
Loss = 8.2924e-02, PNorm = 80.7198, GNorm = 0.5227, lr_0 = 2.8317e-04
Loss = 1.0181e-01, PNorm = 80.7222, GNorm = 0.7785, lr_0 = 2.8298e-04
Loss = 8.3828e-02, PNorm = 80.7272, GNorm = 0.4748, lr_0 = 2.8279e-04
Loss = 9.9523e-02, PNorm = 80.7331, GNorm = 0.5682, lr_0 = 2.8259e-04
Loss = 9.9097e-02, PNorm = 80.7398, GNorm = 0.6325, lr_0 = 2.8240e-04
Loss = 9.5930e-02, PNorm = 80.7461, GNorm = 0.6659, lr_0 = 2.8221e-04
Loss = 9.7818e-02, PNorm = 80.7502, GNorm = 0.5527, lr_0 = 2.8201e-04
Loss = 9.8636e-02, PNorm = 80.7577, GNorm = 0.6349, lr_0 = 2.8182e-04
Loss = 9.6785e-02, PNorm = 80.7649, GNorm = 0.6267, lr_0 = 2.8163e-04
Loss = 1.0688e-01, PNorm = 80.7721, GNorm = 0.5966, lr_0 = 2.8143e-04
Loss = 1.0390e-01, PNorm = 80.7766, GNorm = 0.6429, lr_0 = 2.8124e-04
Loss = 1.0026e-01, PNorm = 80.7817, GNorm = 0.6320, lr_0 = 2.8105e-04
Loss = 1.0111e-01, PNorm = 80.7869, GNorm = 0.7000, lr_0 = 2.8085e-04
Loss = 1.1558e-01, PNorm = 80.7925, GNorm = 0.7079, lr_0 = 2.8066e-04
Loss = 9.4399e-02, PNorm = 80.7985, GNorm = 0.8717, lr_0 = 2.8047e-04
Loss = 1.0349e-01, PNorm = 80.8061, GNorm = 0.8980, lr_0 = 2.8028e-04
Loss = 8.8019e-02, PNorm = 80.8108, GNorm = 0.7623, lr_0 = 2.8009e-04
Loss = 1.0471e-01, PNorm = 80.8160, GNorm = 0.6251, lr_0 = 2.7989e-04
Loss = 1.0313e-01, PNorm = 80.8218, GNorm = 0.9535, lr_0 = 2.7970e-04
Loss = 1.0764e-01, PNorm = 80.8260, GNorm = 0.8480, lr_0 = 2.7951e-04
Loss = 1.0750e-01, PNorm = 80.8297, GNorm = 0.6685, lr_0 = 2.7932e-04
Loss = 1.0422e-01, PNorm = 80.8358, GNorm = 0.8225, lr_0 = 2.7913e-04
Loss = 1.1046e-01, PNorm = 80.8372, GNorm = 0.7181, lr_0 = 2.7894e-04
Loss = 1.1323e-01, PNorm = 80.8399, GNorm = 0.6422, lr_0 = 2.7875e-04
Loss = 1.0550e-01, PNorm = 80.8422, GNorm = 0.5899, lr_0 = 2.7855e-04
Loss = 1.1437e-01, PNorm = 80.8470, GNorm = 1.2152, lr_0 = 2.7836e-04
Loss = 9.5434e-02, PNorm = 80.8503, GNorm = 1.0292, lr_0 = 2.7817e-04
Loss = 1.0927e-01, PNorm = 80.8555, GNorm = 0.6367, lr_0 = 2.7798e-04
Loss = 1.1101e-01, PNorm = 80.8676, GNorm = 0.5699, lr_0 = 2.7779e-04
Loss = 1.0327e-01, PNorm = 80.8782, GNorm = 0.5755, lr_0 = 2.7760e-04
Loss = 8.9075e-02, PNorm = 80.8864, GNorm = 0.5361, lr_0 = 2.7741e-04
Loss = 1.0049e-01, PNorm = 80.8937, GNorm = 0.5977, lr_0 = 2.7722e-04
Loss = 1.0335e-01, PNorm = 80.9007, GNorm = 0.6318, lr_0 = 2.7703e-04
Loss = 1.0599e-01, PNorm = 80.9055, GNorm = 0.8067, lr_0 = 2.7684e-04
Loss = 1.0237e-01, PNorm = 80.9136, GNorm = 0.7873, lr_0 = 2.7665e-04
Loss = 7.9865e-02, PNorm = 80.9173, GNorm = 0.5262, lr_0 = 2.7646e-04
Loss = 1.1823e-01, PNorm = 80.9250, GNorm = 0.7642, lr_0 = 2.7627e-04
Loss = 9.6190e-02, PNorm = 80.9304, GNorm = 0.7149, lr_0 = 2.7608e-04
Loss = 9.6202e-02, PNorm = 80.9357, GNorm = 0.5729, lr_0 = 2.7590e-04
Loss = 1.0858e-01, PNorm = 80.9383, GNorm = 0.7178, lr_0 = 2.7571e-04
Loss = 1.0415e-01, PNorm = 80.9445, GNorm = 0.8426, lr_0 = 2.7552e-04
Loss = 1.0710e-01, PNorm = 80.9477, GNorm = 0.9519, lr_0 = 2.7533e-04
Loss = 1.1623e-01, PNorm = 80.9525, GNorm = 0.6982, lr_0 = 2.7514e-04
Loss = 1.0112e-01, PNorm = 80.9563, GNorm = 0.7534, lr_0 = 2.7495e-04
Loss = 1.1459e-01, PNorm = 80.9648, GNorm = 0.6210, lr_0 = 2.7476e-04
Loss = 1.0815e-01, PNorm = 80.9715, GNorm = 0.6221, lr_0 = 2.7457e-04
Loss = 1.0059e-01, PNorm = 80.9790, GNorm = 0.5780, lr_0 = 2.7439e-04
Loss = 9.4183e-02, PNorm = 80.9848, GNorm = 0.6246, lr_0 = 2.7420e-04
Loss = 1.0473e-01, PNorm = 80.9889, GNorm = 0.5092, lr_0 = 2.7401e-04
Loss = 9.6680e-02, PNorm = 80.9971, GNorm = 0.5926, lr_0 = 2.7382e-04
Loss = 1.0276e-01, PNorm = 81.0014, GNorm = 0.7004, lr_0 = 2.7364e-04
Loss = 1.1388e-01, PNorm = 81.0098, GNorm = 0.5931, lr_0 = 2.7345e-04
Loss = 9.7856e-02, PNorm = 81.0146, GNorm = 0.6453, lr_0 = 2.7326e-04
Loss = 1.1411e-01, PNorm = 81.0189, GNorm = 0.5390, lr_0 = 2.7307e-04
Loss = 1.0162e-01, PNorm = 81.0236, GNorm = 0.7626, lr_0 = 2.7289e-04
Loss = 1.1137e-01, PNorm = 81.0272, GNorm = 0.5731, lr_0 = 2.7270e-04
Loss = 9.7054e-02, PNorm = 81.0333, GNorm = 0.7965, lr_0 = 2.7251e-04
Loss = 1.0465e-01, PNorm = 81.0436, GNorm = 0.6881, lr_0 = 2.7233e-04
Loss = 9.2722e-02, PNorm = 81.0501, GNorm = 0.7114, lr_0 = 2.7214e-04
Loss = 1.0445e-01, PNorm = 81.0551, GNorm = 0.5879, lr_0 = 2.7195e-04
Loss = 1.0012e-01, PNorm = 81.0621, GNorm = 0.6789, lr_0 = 2.7177e-04
Loss = 9.5040e-02, PNorm = 81.0678, GNorm = 0.7936, lr_0 = 2.7158e-04
Loss = 1.2445e-01, PNorm = 81.0729, GNorm = 0.7070, lr_0 = 2.7139e-04
Loss = 1.0486e-01, PNorm = 81.0766, GNorm = 0.6218, lr_0 = 2.7121e-04
Loss = 1.1359e-01, PNorm = 81.0818, GNorm = 0.6180, lr_0 = 2.7102e-04
Loss = 9.3042e-02, PNorm = 81.0875, GNorm = 0.5942, lr_0 = 2.7084e-04
Loss = 1.1546e-01, PNorm = 81.0919, GNorm = 0.9743, lr_0 = 2.7065e-04
Loss = 9.9987e-02, PNorm = 81.0974, GNorm = 0.8785, lr_0 = 2.7047e-04
Loss = 1.0670e-01, PNorm = 81.1056, GNorm = 0.4899, lr_0 = 2.7028e-04
Loss = 9.9898e-02, PNorm = 81.1147, GNorm = 1.1399, lr_0 = 2.7010e-04
Loss = 1.0355e-01, PNorm = 81.1168, GNorm = 0.6919, lr_0 = 2.6991e-04
Loss = 9.4843e-02, PNorm = 81.1224, GNorm = 0.6544, lr_0 = 2.6973e-04
Loss = 1.0607e-01, PNorm = 81.1282, GNorm = 0.7549, lr_0 = 2.6954e-04
Loss = 9.9851e-02, PNorm = 81.1356, GNorm = 0.7533, lr_0 = 2.6936e-04
Loss = 1.0767e-01, PNorm = 81.1392, GNorm = 0.5653, lr_0 = 2.6917e-04
Loss = 1.1018e-01, PNorm = 81.1452, GNorm = 0.9645, lr_0 = 2.6899e-04
Loss = 1.2495e-01, PNorm = 81.1566, GNorm = 0.7367, lr_0 = 2.6880e-04
Loss = 9.6148e-02, PNorm = 81.1658, GNorm = 0.7896, lr_0 = 2.6862e-04
Loss = 1.0188e-01, PNorm = 81.1740, GNorm = 0.5920, lr_0 = 2.6844e-04
Loss = 9.8094e-02, PNorm = 81.1780, GNorm = 1.0749, lr_0 = 2.6825e-04
Validation mae = 0.228186
Epoch 18
Loss = 9.9746e-02, PNorm = 81.1839, GNorm = 0.7507, lr_0 = 2.6807e-04
Loss = 1.0140e-01, PNorm = 81.1869, GNorm = 0.6067, lr_0 = 2.6788e-04
Loss = 9.0980e-02, PNorm = 81.1933, GNorm = 0.4845, lr_0 = 2.6770e-04
Loss = 8.9402e-02, PNorm = 81.2006, GNorm = 0.7763, lr_0 = 2.6752e-04
Loss = 1.0397e-01, PNorm = 81.2047, GNorm = 0.4718, lr_0 = 2.6733e-04
Loss = 9.1393e-02, PNorm = 81.2119, GNorm = 0.6556, lr_0 = 2.6715e-04
Loss = 9.2356e-02, PNorm = 81.2187, GNorm = 0.6173, lr_0 = 2.6697e-04
Loss = 1.0847e-01, PNorm = 81.2213, GNorm = 0.7262, lr_0 = 2.6678e-04
Loss = 8.9504e-02, PNorm = 81.2273, GNorm = 0.6014, lr_0 = 2.6660e-04
Loss = 8.4313e-02, PNorm = 81.2323, GNorm = 0.5857, lr_0 = 2.6642e-04
Loss = 9.9193e-02, PNorm = 81.2356, GNorm = 0.6276, lr_0 = 2.6624e-04
Loss = 9.1773e-02, PNorm = 81.2421, GNorm = 0.5671, lr_0 = 2.6605e-04
Loss = 9.6109e-02, PNorm = 81.2486, GNorm = 0.5817, lr_0 = 2.6587e-04
Loss = 1.0344e-01, PNorm = 81.2532, GNorm = 0.5228, lr_0 = 2.6569e-04
Loss = 8.7267e-02, PNorm = 81.2592, GNorm = 0.4996, lr_0 = 2.6551e-04
Loss = 9.2811e-02, PNorm = 81.2610, GNorm = 0.6758, lr_0 = 2.6533e-04
Loss = 8.4544e-02, PNorm = 81.2633, GNorm = 0.6636, lr_0 = 2.6514e-04
Loss = 8.6865e-02, PNorm = 81.2690, GNorm = 0.7405, lr_0 = 2.6496e-04
Loss = 8.5814e-02, PNorm = 81.2734, GNorm = 0.4756, lr_0 = 2.6478e-04
Loss = 1.0155e-01, PNorm = 81.2770, GNorm = 0.8487, lr_0 = 2.6460e-04
Loss = 9.2419e-02, PNorm = 81.2833, GNorm = 0.5936, lr_0 = 2.6442e-04
Loss = 9.9031e-02, PNorm = 81.2909, GNorm = 0.6100, lr_0 = 2.6424e-04
Loss = 8.6077e-02, PNorm = 81.2968, GNorm = 0.5820, lr_0 = 2.6406e-04
Loss = 9.5969e-02, PNorm = 81.3040, GNorm = 0.9642, lr_0 = 2.6388e-04
Loss = 8.9625e-02, PNorm = 81.3052, GNorm = 0.4316, lr_0 = 2.6369e-04
Loss = 8.5703e-02, PNorm = 81.3092, GNorm = 0.5406, lr_0 = 2.6351e-04
Loss = 8.5321e-02, PNorm = 81.3167, GNorm = 0.4636, lr_0 = 2.6333e-04
Loss = 7.8788e-02, PNorm = 81.3233, GNorm = 0.6582, lr_0 = 2.6315e-04
Loss = 9.4341e-02, PNorm = 81.3273, GNorm = 0.9315, lr_0 = 2.6297e-04
Loss = 9.4554e-02, PNorm = 81.3294, GNorm = 0.9478, lr_0 = 2.6279e-04
Loss = 8.7966e-02, PNorm = 81.3339, GNorm = 0.6922, lr_0 = 2.6261e-04
Loss = 9.5169e-02, PNorm = 81.3417, GNorm = 0.6085, lr_0 = 2.6243e-04
Loss = 9.3427e-02, PNorm = 81.3469, GNorm = 0.8505, lr_0 = 2.6225e-04
Loss = 8.9538e-02, PNorm = 81.3498, GNorm = 0.5865, lr_0 = 2.6207e-04
Loss = 8.9830e-02, PNorm = 81.3544, GNorm = 0.6061, lr_0 = 2.6189e-04
Loss = 9.7518e-02, PNorm = 81.3593, GNorm = 0.7588, lr_0 = 2.6171e-04
Loss = 9.6799e-02, PNorm = 81.3627, GNorm = 0.6626, lr_0 = 2.6153e-04
Loss = 8.4363e-02, PNorm = 81.3689, GNorm = 0.5605, lr_0 = 2.6136e-04
Loss = 9.3751e-02, PNorm = 81.3755, GNorm = 0.5620, lr_0 = 2.6118e-04
Loss = 9.8231e-02, PNorm = 81.3786, GNorm = 0.6832, lr_0 = 2.6100e-04
Loss = 9.5868e-02, PNorm = 81.3844, GNorm = 0.6179, lr_0 = 2.6082e-04
Loss = 8.7825e-02, PNorm = 81.3903, GNorm = 0.7217, lr_0 = 2.6064e-04
Loss = 1.0293e-01, PNorm = 81.3951, GNorm = 0.6686, lr_0 = 2.6046e-04
Loss = 1.0370e-01, PNorm = 81.3981, GNorm = 0.4473, lr_0 = 2.6028e-04
Loss = 9.9954e-02, PNorm = 81.4040, GNorm = 0.8268, lr_0 = 2.6011e-04
Loss = 1.0928e-01, PNorm = 81.4095, GNorm = 0.8110, lr_0 = 2.5993e-04
Loss = 1.2568e-01, PNorm = 81.4149, GNorm = 1.2419, lr_0 = 2.5975e-04
Loss = 9.2927e-02, PNorm = 81.4253, GNorm = 0.6297, lr_0 = 2.5957e-04
Loss = 8.9121e-02, PNorm = 81.4285, GNorm = 0.7016, lr_0 = 2.5939e-04
Loss = 8.8564e-02, PNorm = 81.4328, GNorm = 0.5852, lr_0 = 2.5922e-04
Loss = 9.7372e-02, PNorm = 81.4369, GNorm = 0.8367, lr_0 = 2.5904e-04
Loss = 9.2647e-02, PNorm = 81.4413, GNorm = 0.5451, lr_0 = 2.5886e-04
Loss = 9.7178e-02, PNorm = 81.4460, GNorm = 1.0809, lr_0 = 2.5868e-04
Loss = 1.1056e-01, PNorm = 81.4498, GNorm = 0.6235, lr_0 = 2.5851e-04
Loss = 9.3512e-02, PNorm = 81.4567, GNorm = 0.6677, lr_0 = 2.5833e-04
Loss = 9.1571e-02, PNorm = 81.4624, GNorm = 0.7554, lr_0 = 2.5815e-04
Loss = 9.7140e-02, PNorm = 81.4635, GNorm = 0.8597, lr_0 = 2.5797e-04
Loss = 1.0123e-01, PNorm = 81.4673, GNorm = 0.6882, lr_0 = 2.5780e-04
Loss = 1.0360e-01, PNorm = 81.4732, GNorm = 0.6233, lr_0 = 2.5762e-04
Loss = 1.0442e-01, PNorm = 81.4798, GNorm = 0.6958, lr_0 = 2.5745e-04
Loss = 9.6836e-02, PNorm = 81.4863, GNorm = 0.5361, lr_0 = 2.5727e-04
Loss = 9.6927e-02, PNorm = 81.4945, GNorm = 0.7381, lr_0 = 2.5709e-04
Loss = 1.0343e-01, PNorm = 81.5040, GNorm = 0.6047, lr_0 = 2.5692e-04
Loss = 9.8074e-02, PNorm = 81.5087, GNorm = 0.5416, lr_0 = 2.5674e-04
Loss = 1.0614e-01, PNorm = 81.5122, GNorm = 1.1336, lr_0 = 2.5656e-04
Loss = 1.0226e-01, PNorm = 81.5164, GNorm = 0.6064, lr_0 = 2.5639e-04
Loss = 9.4943e-02, PNorm = 81.5220, GNorm = 0.7961, lr_0 = 2.5621e-04
Loss = 1.0936e-01, PNorm = 81.5274, GNorm = 0.7658, lr_0 = 2.5604e-04
Loss = 8.8757e-02, PNorm = 81.5311, GNorm = 0.5526, lr_0 = 2.5586e-04
Loss = 9.6064e-02, PNorm = 81.5339, GNorm = 0.8870, lr_0 = 2.5569e-04
Loss = 1.0771e-01, PNorm = 81.5397, GNorm = 0.7671, lr_0 = 2.5551e-04
Loss = 1.1180e-01, PNorm = 81.5437, GNorm = 0.9575, lr_0 = 2.5534e-04
Loss = 1.0631e-01, PNorm = 81.5497, GNorm = 0.7394, lr_0 = 2.5516e-04
Loss = 1.0176e-01, PNorm = 81.5544, GNorm = 0.7752, lr_0 = 2.5499e-04
Loss = 1.0399e-01, PNorm = 81.5591, GNorm = 0.8822, lr_0 = 2.5481e-04
Loss = 9.6599e-02, PNorm = 81.5638, GNorm = 0.6577, lr_0 = 2.5464e-04
Loss = 9.4903e-02, PNorm = 81.5692, GNorm = 0.6549, lr_0 = 2.5446e-04
Loss = 8.6576e-02, PNorm = 81.5746, GNorm = 0.6491, lr_0 = 2.5429e-04
Loss = 9.5958e-02, PNorm = 81.5792, GNorm = 0.7203, lr_0 = 2.5411e-04
Loss = 8.7809e-02, PNorm = 81.5873, GNorm = 0.5465, lr_0 = 2.5394e-04
Loss = 1.0545e-01, PNorm = 81.5944, GNorm = 0.6313, lr_0 = 2.5377e-04
Loss = 1.2238e-01, PNorm = 81.6015, GNorm = 0.6910, lr_0 = 2.5359e-04
Loss = 9.2766e-02, PNorm = 81.6080, GNorm = 0.6334, lr_0 = 2.5342e-04
Loss = 9.9424e-02, PNorm = 81.6162, GNorm = 0.5975, lr_0 = 2.5325e-04
Loss = 9.9956e-02, PNorm = 81.6209, GNorm = 0.6213, lr_0 = 2.5307e-04
Loss = 9.2911e-02, PNorm = 81.6274, GNorm = 0.6096, lr_0 = 2.5290e-04
Loss = 9.8674e-02, PNorm = 81.6337, GNorm = 0.9024, lr_0 = 2.5273e-04
Loss = 1.0341e-01, PNorm = 81.6368, GNorm = 0.6092, lr_0 = 2.5255e-04
Loss = 9.9368e-02, PNorm = 81.6426, GNorm = 0.5610, lr_0 = 2.5238e-04
Loss = 1.0725e-01, PNorm = 81.6484, GNorm = 1.0661, lr_0 = 2.5221e-04
Loss = 1.0913e-01, PNorm = 81.6556, GNorm = 0.6053, lr_0 = 2.5203e-04
Loss = 9.3183e-02, PNorm = 81.6613, GNorm = 0.7167, lr_0 = 2.5186e-04
Loss = 9.8408e-02, PNorm = 81.6670, GNorm = 0.7967, lr_0 = 2.5169e-04
Loss = 9.7849e-02, PNorm = 81.6737, GNorm = 0.8317, lr_0 = 2.5152e-04
Loss = 9.7112e-02, PNorm = 81.6784, GNorm = 0.6112, lr_0 = 2.5134e-04
Loss = 9.1497e-02, PNorm = 81.6834, GNorm = 0.6649, lr_0 = 2.5117e-04
Loss = 9.3846e-02, PNorm = 81.6868, GNorm = 0.4336, lr_0 = 2.5100e-04
Loss = 1.0094e-01, PNorm = 81.6910, GNorm = 0.5680, lr_0 = 2.5083e-04
Loss = 9.1520e-02, PNorm = 81.6972, GNorm = 0.5936, lr_0 = 2.5066e-04
Loss = 1.1053e-01, PNorm = 81.6991, GNorm = 1.1677, lr_0 = 2.5048e-04
Loss = 9.1600e-02, PNorm = 81.7016, GNorm = 0.7782, lr_0 = 2.5031e-04
Loss = 9.9717e-02, PNorm = 81.7068, GNorm = 0.6595, lr_0 = 2.5014e-04
Loss = 1.1075e-01, PNorm = 81.7127, GNorm = 0.7424, lr_0 = 2.4997e-04
Loss = 9.0690e-02, PNorm = 81.7184, GNorm = 0.7674, lr_0 = 2.4980e-04
Loss = 1.1195e-01, PNorm = 81.7226, GNorm = 0.7672, lr_0 = 2.4963e-04
Loss = 1.0176e-01, PNorm = 81.7226, GNorm = 0.7510, lr_0 = 2.4946e-04
Loss = 9.7322e-02, PNorm = 81.7286, GNorm = 0.6697, lr_0 = 2.4929e-04
Loss = 9.3957e-02, PNorm = 81.7288, GNorm = 0.6083, lr_0 = 2.4911e-04
Loss = 9.5806e-02, PNorm = 81.7309, GNorm = 0.6543, lr_0 = 2.4894e-04
Loss = 1.0164e-01, PNorm = 81.7398, GNorm = 0.7340, lr_0 = 2.4877e-04
Loss = 1.0626e-01, PNorm = 81.7453, GNorm = 0.8135, lr_0 = 2.4860e-04
Loss = 1.0034e-01, PNorm = 81.7520, GNorm = 0.7355, lr_0 = 2.4843e-04
Loss = 1.0691e-01, PNorm = 81.7557, GNorm = 0.6708, lr_0 = 2.4826e-04
Loss = 1.1583e-01, PNorm = 81.7580, GNorm = 0.7692, lr_0 = 2.4809e-04
Loss = 1.0835e-01, PNorm = 81.7641, GNorm = 0.8285, lr_0 = 2.4792e-04
Loss = 9.3195e-02, PNorm = 81.7687, GNorm = 0.5217, lr_0 = 2.4775e-04
Loss = 9.6081e-02, PNorm = 81.7728, GNorm = 0.5120, lr_0 = 2.4758e-04
Loss = 1.0910e-01, PNorm = 81.7803, GNorm = 0.8512, lr_0 = 2.4741e-04
Loss = 9.6628e-02, PNorm = 81.7814, GNorm = 0.4875, lr_0 = 2.4724e-04
Loss = 1.0009e-01, PNorm = 81.7830, GNorm = 0.4985, lr_0 = 2.4707e-04
Validation mae = 0.228926
Epoch 19
Loss = 9.5278e-02, PNorm = 81.7902, GNorm = 0.7551, lr_0 = 2.4690e-04
Loss = 9.4161e-02, PNorm = 81.7961, GNorm = 0.5430, lr_0 = 2.4674e-04
Loss = 8.1543e-02, PNorm = 81.8034, GNorm = 0.4456, lr_0 = 2.4657e-04
Loss = 9.6342e-02, PNorm = 81.8103, GNorm = 0.7796, lr_0 = 2.4640e-04
Loss = 8.2327e-02, PNorm = 81.8160, GNorm = 0.5188, lr_0 = 2.4623e-04
Loss = 8.7687e-02, PNorm = 81.8185, GNorm = 0.5767, lr_0 = 2.4606e-04
Loss = 1.0711e-01, PNorm = 81.8207, GNorm = 0.5767, lr_0 = 2.4589e-04
Loss = 8.6491e-02, PNorm = 81.8230, GNorm = 0.4680, lr_0 = 2.4572e-04
Loss = 8.4909e-02, PNorm = 81.8256, GNorm = 0.7278, lr_0 = 2.4556e-04
Loss = 8.5812e-02, PNorm = 81.8319, GNorm = 0.7517, lr_0 = 2.4539e-04
Loss = 1.0153e-01, PNorm = 81.8372, GNorm = 0.5969, lr_0 = 2.4522e-04
Loss = 1.1417e-01, PNorm = 81.8442, GNorm = 0.7133, lr_0 = 2.4505e-04
Loss = 9.7993e-02, PNorm = 81.8513, GNorm = 0.7956, lr_0 = 2.4488e-04
Loss = 8.1930e-02, PNorm = 81.8528, GNorm = 1.0449, lr_0 = 2.4472e-04
Loss = 1.0196e-01, PNorm = 81.8571, GNorm = 0.6448, lr_0 = 2.4455e-04
Loss = 9.5423e-02, PNorm = 81.8633, GNorm = 0.6700, lr_0 = 2.4438e-04
Loss = 9.6897e-02, PNorm = 81.8702, GNorm = 0.5878, lr_0 = 2.4421e-04
Loss = 9.9867e-02, PNorm = 81.8762, GNorm = 0.5121, lr_0 = 2.4405e-04
Loss = 9.4700e-02, PNorm = 81.8794, GNorm = 0.6704, lr_0 = 2.4388e-04
Loss = 8.0278e-02, PNorm = 81.8821, GNorm = 0.7940, lr_0 = 2.4371e-04
Loss = 9.0384e-02, PNorm = 81.8869, GNorm = 0.7601, lr_0 = 2.4354e-04
Loss = 8.9740e-02, PNorm = 81.8910, GNorm = 1.0458, lr_0 = 2.4338e-04
Loss = 9.6107e-02, PNorm = 81.8991, GNorm = 0.5552, lr_0 = 2.4321e-04
Loss = 8.2760e-02, PNorm = 81.9035, GNorm = 0.5943, lr_0 = 2.4304e-04
Loss = 1.0502e-01, PNorm = 81.9071, GNorm = 0.6795, lr_0 = 2.4288e-04
Loss = 9.3192e-02, PNorm = 81.9109, GNorm = 0.5438, lr_0 = 2.4271e-04
Loss = 8.8344e-02, PNorm = 81.9135, GNorm = 0.5008, lr_0 = 2.4254e-04
Loss = 9.6884e-02, PNorm = 81.9173, GNorm = 0.6303, lr_0 = 2.4238e-04
Loss = 1.0531e-01, PNorm = 81.9244, GNorm = 0.6776, lr_0 = 2.4221e-04
Loss = 9.6568e-02, PNorm = 81.9296, GNorm = 1.0935, lr_0 = 2.4205e-04
Loss = 9.7686e-02, PNorm = 81.9335, GNorm = 0.8235, lr_0 = 2.4188e-04
Loss = 9.4795e-02, PNorm = 81.9383, GNorm = 0.6145, lr_0 = 2.4171e-04
Loss = 9.9452e-02, PNorm = 81.9454, GNorm = 0.6687, lr_0 = 2.4155e-04
Loss = 1.0394e-01, PNorm = 81.9489, GNorm = 0.5879, lr_0 = 2.4138e-04
Loss = 9.0457e-02, PNorm = 81.9544, GNorm = 0.6112, lr_0 = 2.4122e-04
Loss = 1.0291e-01, PNorm = 81.9609, GNorm = 0.5484, lr_0 = 2.4105e-04
Loss = 9.8467e-02, PNorm = 81.9641, GNorm = 0.7054, lr_0 = 2.4089e-04
Loss = 9.1883e-02, PNorm = 81.9665, GNorm = 0.6711, lr_0 = 2.4072e-04
Loss = 1.0181e-01, PNorm = 81.9696, GNorm = 0.5643, lr_0 = 2.4056e-04
Loss = 8.9302e-02, PNorm = 81.9711, GNorm = 0.5324, lr_0 = 2.4039e-04
Loss = 8.6206e-02, PNorm = 81.9756, GNorm = 0.8592, lr_0 = 2.4023e-04
Loss = 1.0493e-01, PNorm = 81.9808, GNorm = 0.6753, lr_0 = 2.4006e-04
Loss = 8.6239e-02, PNorm = 81.9866, GNorm = 0.8617, lr_0 = 2.3990e-04
Loss = 1.0250e-01, PNorm = 81.9911, GNorm = 0.6261, lr_0 = 2.3974e-04
Loss = 9.8995e-02, PNorm = 81.9947, GNorm = 0.6090, lr_0 = 2.3957e-04
Loss = 8.6922e-02, PNorm = 81.9968, GNorm = 0.6473, lr_0 = 2.3941e-04
Loss = 9.1305e-02, PNorm = 81.9991, GNorm = 0.6548, lr_0 = 2.3924e-04
Loss = 9.2018e-02, PNorm = 82.0025, GNorm = 0.5341, lr_0 = 2.3908e-04
Loss = 9.4519e-02, PNorm = 82.0075, GNorm = 0.6322, lr_0 = 2.3892e-04
Loss = 9.3570e-02, PNorm = 82.0098, GNorm = 0.8920, lr_0 = 2.3875e-04
Loss = 9.3091e-02, PNorm = 82.0163, GNorm = 0.4612, lr_0 = 2.3859e-04
Loss = 9.1865e-02, PNorm = 82.0196, GNorm = 0.6170, lr_0 = 2.3842e-04
Loss = 9.1163e-02, PNorm = 82.0266, GNorm = 0.5356, lr_0 = 2.3826e-04
Loss = 8.2166e-02, PNorm = 82.0341, GNorm = 0.7303, lr_0 = 2.3810e-04
Loss = 9.7540e-02, PNorm = 82.0372, GNorm = 0.6483, lr_0 = 2.3794e-04
Loss = 1.0057e-01, PNorm = 82.0402, GNorm = 0.7587, lr_0 = 2.3777e-04
Loss = 9.6584e-02, PNorm = 82.0432, GNorm = 0.5985, lr_0 = 2.3761e-04
Loss = 9.0903e-02, PNorm = 82.0454, GNorm = 0.5934, lr_0 = 2.3745e-04
Loss = 9.6462e-02, PNorm = 82.0506, GNorm = 0.6675, lr_0 = 2.3728e-04
Loss = 9.4809e-02, PNorm = 82.0526, GNorm = 0.4820, lr_0 = 2.3712e-04
Loss = 9.6821e-02, PNorm = 82.0569, GNorm = 0.6051, lr_0 = 2.3696e-04
Loss = 9.7830e-02, PNorm = 82.0619, GNorm = 0.7336, lr_0 = 2.3680e-04
Loss = 9.3468e-02, PNorm = 82.0654, GNorm = 0.5684, lr_0 = 2.3663e-04
Loss = 8.1604e-02, PNorm = 82.0702, GNorm = 0.6986, lr_0 = 2.3647e-04
Loss = 9.4739e-02, PNorm = 82.0741, GNorm = 0.6821, lr_0 = 2.3631e-04
Loss = 8.7747e-02, PNorm = 82.0786, GNorm = 0.5668, lr_0 = 2.3615e-04
Loss = 1.0118e-01, PNorm = 82.0865, GNorm = 0.7748, lr_0 = 2.3599e-04
Loss = 9.1330e-02, PNorm = 82.0922, GNorm = 0.6384, lr_0 = 2.3582e-04
Loss = 9.5796e-02, PNorm = 82.0945, GNorm = 0.6924, lr_0 = 2.3566e-04
Loss = 1.0228e-01, PNorm = 82.1001, GNorm = 0.9172, lr_0 = 2.3550e-04
Loss = 9.5819e-02, PNorm = 82.1056, GNorm = 0.6509, lr_0 = 2.3534e-04
Loss = 8.7888e-02, PNorm = 82.1092, GNorm = 0.7035, lr_0 = 2.3518e-04
Loss = 9.7744e-02, PNorm = 82.1124, GNorm = 0.5778, lr_0 = 2.3502e-04
Loss = 9.4095e-02, PNorm = 82.1153, GNorm = 0.6030, lr_0 = 2.3486e-04
Loss = 8.4375e-02, PNorm = 82.1168, GNorm = 0.4680, lr_0 = 2.3470e-04
Loss = 9.7629e-02, PNorm = 82.1195, GNorm = 0.8218, lr_0 = 2.3454e-04
Loss = 9.5023e-02, PNorm = 82.1234, GNorm = 0.6416, lr_0 = 2.3437e-04
Loss = 1.0660e-01, PNorm = 82.1285, GNorm = 0.6191, lr_0 = 2.3421e-04
Loss = 9.6142e-02, PNorm = 82.1339, GNorm = 0.8708, lr_0 = 2.3405e-04
Loss = 9.5361e-02, PNorm = 82.1387, GNorm = 0.5303, lr_0 = 2.3389e-04
Loss = 8.6287e-02, PNorm = 82.1397, GNorm = 0.7470, lr_0 = 2.3373e-04
Loss = 9.0893e-02, PNorm = 82.1433, GNorm = 0.7026, lr_0 = 2.3357e-04
Loss = 1.0017e-01, PNorm = 82.1476, GNorm = 0.6370, lr_0 = 2.3341e-04
Loss = 9.1412e-02, PNorm = 82.1529, GNorm = 0.6395, lr_0 = 2.3325e-04
Loss = 9.7045e-02, PNorm = 82.1593, GNorm = 0.5172, lr_0 = 2.3309e-04
Loss = 8.9303e-02, PNorm = 82.1641, GNorm = 0.7869, lr_0 = 2.3293e-04
Loss = 8.9971e-02, PNorm = 82.1672, GNorm = 0.7627, lr_0 = 2.3277e-04
Loss = 1.0136e-01, PNorm = 82.1703, GNorm = 1.4177, lr_0 = 2.3261e-04
Loss = 9.1790e-02, PNorm = 82.1746, GNorm = 0.7243, lr_0 = 2.3246e-04
Loss = 1.0170e-01, PNorm = 82.1793, GNorm = 0.7822, lr_0 = 2.3230e-04
Loss = 9.2855e-02, PNorm = 82.1819, GNorm = 1.0071, lr_0 = 2.3214e-04
Loss = 8.9488e-02, PNorm = 82.1849, GNorm = 0.7945, lr_0 = 2.3198e-04
Loss = 8.9651e-02, PNorm = 82.1910, GNorm = 0.4932, lr_0 = 2.3182e-04
Loss = 9.3317e-02, PNorm = 82.1941, GNorm = 0.7124, lr_0 = 2.3166e-04
Loss = 9.2528e-02, PNorm = 82.1981, GNorm = 0.6553, lr_0 = 2.3150e-04
Loss = 8.8461e-02, PNorm = 82.2022, GNorm = 0.6761, lr_0 = 2.3134e-04
Loss = 1.0355e-01, PNorm = 82.2069, GNorm = 0.5031, lr_0 = 2.3118e-04
Loss = 1.0163e-01, PNorm = 82.2121, GNorm = 0.7914, lr_0 = 2.3103e-04
Loss = 1.1920e-01, PNorm = 82.2153, GNorm = 0.7185, lr_0 = 2.3087e-04
Loss = 1.1251e-01, PNorm = 82.2214, GNorm = 0.7216, lr_0 = 2.3071e-04
Loss = 9.4891e-02, PNorm = 82.2306, GNorm = 0.5697, lr_0 = 2.3055e-04
Loss = 9.7078e-02, PNorm = 82.2371, GNorm = 0.6648, lr_0 = 2.3039e-04
Loss = 8.6676e-02, PNorm = 82.2402, GNorm = 0.5165, lr_0 = 2.3024e-04
Loss = 1.0502e-01, PNorm = 82.2435, GNorm = 0.6792, lr_0 = 2.3008e-04
Loss = 1.0039e-01, PNorm = 82.2479, GNorm = 0.7922, lr_0 = 2.2992e-04
Loss = 8.8950e-02, PNorm = 82.2528, GNorm = 0.7184, lr_0 = 2.2976e-04
Loss = 9.7971e-02, PNorm = 82.2566, GNorm = 0.6585, lr_0 = 2.2961e-04
Loss = 8.9166e-02, PNorm = 82.2608, GNorm = 0.7898, lr_0 = 2.2945e-04
Loss = 9.0766e-02, PNorm = 82.2647, GNorm = 0.7031, lr_0 = 2.2929e-04
Loss = 8.9654e-02, PNorm = 82.2726, GNorm = 0.6398, lr_0 = 2.2913e-04
Loss = 9.9886e-02, PNorm = 82.2760, GNorm = 0.5272, lr_0 = 2.2898e-04
Loss = 1.0794e-01, PNorm = 82.2744, GNorm = 0.7557, lr_0 = 2.2882e-04
Loss = 9.2010e-02, PNorm = 82.2772, GNorm = 0.5945, lr_0 = 2.2866e-04
Loss = 9.0798e-02, PNorm = 82.2819, GNorm = 0.7380, lr_0 = 2.2851e-04
Loss = 9.7226e-02, PNorm = 82.2832, GNorm = 0.6292, lr_0 = 2.2835e-04
Loss = 9.7626e-02, PNorm = 82.2880, GNorm = 0.5458, lr_0 = 2.2819e-04
Loss = 9.8615e-02, PNorm = 82.2921, GNorm = 0.6296, lr_0 = 2.2804e-04
Loss = 1.1419e-01, PNorm = 82.2979, GNorm = 0.6614, lr_0 = 2.2788e-04
Loss = 8.8392e-02, PNorm = 82.3057, GNorm = 0.4990, lr_0 = 2.2773e-04
Loss = 1.0893e-01, PNorm = 82.3100, GNorm = 0.6320, lr_0 = 2.2757e-04
Validation mae = 0.229945
Epoch 20
Loss = 9.0962e-02, PNorm = 82.3130, GNorm = 0.7377, lr_0 = 2.2741e-04
Loss = 8.2804e-02, PNorm = 82.3149, GNorm = 0.7637, lr_0 = 2.2726e-04
Loss = 8.6680e-02, PNorm = 82.3186, GNorm = 1.0198, lr_0 = 2.2710e-04
Loss = 9.0049e-02, PNorm = 82.3235, GNorm = 1.0128, lr_0 = 2.2695e-04
Loss = 8.8768e-02, PNorm = 82.3284, GNorm = 0.5985, lr_0 = 2.2679e-04
Loss = 8.7312e-02, PNorm = 82.3321, GNorm = 0.5443, lr_0 = 2.2664e-04
Loss = 8.9981e-02, PNorm = 82.3376, GNorm = 0.6061, lr_0 = 2.2648e-04
Loss = 9.4545e-02, PNorm = 82.3410, GNorm = 0.8198, lr_0 = 2.2632e-04
Loss = 9.5686e-02, PNorm = 82.3455, GNorm = 0.7836, lr_0 = 2.2617e-04
Loss = 7.3545e-02, PNorm = 82.3511, GNorm = 0.5256, lr_0 = 2.2601e-04
Loss = 8.5075e-02, PNorm = 82.3561, GNorm = 0.5727, lr_0 = 2.2586e-04
Loss = 9.8134e-02, PNorm = 82.3606, GNorm = 0.5355, lr_0 = 2.2571e-04
Loss = 8.9700e-02, PNorm = 82.3660, GNorm = 0.5415, lr_0 = 2.2555e-04
Loss = 9.7955e-02, PNorm = 82.3699, GNorm = 0.6633, lr_0 = 2.2540e-04
Loss = 8.6632e-02, PNorm = 82.3743, GNorm = 0.6384, lr_0 = 2.2524e-04
Loss = 8.7055e-02, PNorm = 82.3783, GNorm = 0.6569, lr_0 = 2.2509e-04
Loss = 9.0554e-02, PNorm = 82.3797, GNorm = 0.6865, lr_0 = 2.2493e-04
Loss = 9.1488e-02, PNorm = 82.3831, GNorm = 0.6298, lr_0 = 2.2478e-04
Loss = 9.1834e-02, PNorm = 82.3876, GNorm = 0.7381, lr_0 = 2.2463e-04
Loss = 7.7077e-02, PNorm = 82.3899, GNorm = 0.7408, lr_0 = 2.2447e-04
Loss = 8.8876e-02, PNorm = 82.3920, GNorm = 0.6297, lr_0 = 2.2432e-04
Loss = 8.9892e-02, PNorm = 82.3945, GNorm = 0.7505, lr_0 = 2.2416e-04
Loss = 8.9869e-02, PNorm = 82.4008, GNorm = 0.8208, lr_0 = 2.2401e-04
Loss = 8.8206e-02, PNorm = 82.4040, GNorm = 0.5235, lr_0 = 2.2386e-04
Loss = 9.2980e-02, PNorm = 82.4080, GNorm = 0.5934, lr_0 = 2.2370e-04
Loss = 9.4359e-02, PNorm = 82.4135, GNorm = 0.5980, lr_0 = 2.2355e-04
Loss = 8.9648e-02, PNorm = 82.4153, GNorm = 0.5608, lr_0 = 2.2340e-04
Loss = 9.3444e-02, PNorm = 82.4191, GNorm = 0.7736, lr_0 = 2.2324e-04
Loss = 8.9762e-02, PNorm = 82.4239, GNorm = 0.5260, lr_0 = 2.2309e-04
Loss = 8.0893e-02, PNorm = 82.4301, GNorm = 0.6738, lr_0 = 2.2294e-04
Loss = 9.7110e-02, PNorm = 82.4358, GNorm = 0.8891, lr_0 = 2.2279e-04
Loss = 8.4342e-02, PNorm = 82.4403, GNorm = 0.5176, lr_0 = 2.2263e-04
Loss = 9.4836e-02, PNorm = 82.4444, GNorm = 0.8154, lr_0 = 2.2248e-04
Loss = 1.0200e-01, PNorm = 82.4498, GNorm = 0.6304, lr_0 = 2.2233e-04
Loss = 8.5288e-02, PNorm = 82.4540, GNorm = 0.5012, lr_0 = 2.2218e-04
Loss = 8.8250e-02, PNorm = 82.4550, GNorm = 0.6665, lr_0 = 2.2202e-04
Loss = 9.1038e-02, PNorm = 82.4572, GNorm = 0.5422, lr_0 = 2.2187e-04
Loss = 8.6042e-02, PNorm = 82.4616, GNorm = 0.8245, lr_0 = 2.2172e-04
Loss = 9.1458e-02, PNorm = 82.4660, GNorm = 0.6056, lr_0 = 2.2157e-04
Loss = 9.3809e-02, PNorm = 82.4699, GNorm = 0.6504, lr_0 = 2.2142e-04
Loss = 8.0083e-02, PNorm = 82.4703, GNorm = 0.5642, lr_0 = 2.2126e-04
Loss = 1.1193e-01, PNorm = 82.4767, GNorm = 0.6066, lr_0 = 2.2111e-04
Loss = 9.8610e-02, PNorm = 82.4843, GNorm = 0.6520, lr_0 = 2.2096e-04
Loss = 8.1393e-02, PNorm = 82.4894, GNorm = 0.7676, lr_0 = 2.2081e-04
Loss = 8.0716e-02, PNorm = 82.4933, GNorm = 0.5328, lr_0 = 2.2066e-04
Loss = 1.0176e-01, PNorm = 82.4976, GNorm = 0.7370, lr_0 = 2.2051e-04
Loss = 9.7348e-02, PNorm = 82.5013, GNorm = 0.7154, lr_0 = 2.2036e-04
Loss = 1.0147e-01, PNorm = 82.5039, GNorm = 0.6048, lr_0 = 2.2021e-04
Loss = 9.7147e-02, PNorm = 82.5077, GNorm = 0.6421, lr_0 = 2.2005e-04
Loss = 8.7859e-02, PNorm = 82.5119, GNorm = 0.7725, lr_0 = 2.1990e-04
Loss = 9.9396e-02, PNorm = 82.5184, GNorm = 0.8123, lr_0 = 2.1975e-04
Loss = 1.0237e-01, PNorm = 82.5266, GNorm = 0.6856, lr_0 = 2.1960e-04
Loss = 8.7298e-02, PNorm = 82.5322, GNorm = 0.8081, lr_0 = 2.1945e-04
Loss = 9.1952e-02, PNorm = 82.5382, GNorm = 0.4223, lr_0 = 2.1930e-04
Loss = 9.4905e-02, PNorm = 82.5418, GNorm = 0.6149, lr_0 = 2.1915e-04
Loss = 9.8974e-02, PNorm = 82.5459, GNorm = 0.6756, lr_0 = 2.1900e-04
Loss = 8.8028e-02, PNorm = 82.5502, GNorm = 0.4856, lr_0 = 2.1885e-04
Loss = 8.6111e-02, PNorm = 82.5501, GNorm = 0.4968, lr_0 = 2.1870e-04
Loss = 8.6135e-02, PNorm = 82.5538, GNorm = 0.6774, lr_0 = 2.1855e-04
Loss = 9.3874e-02, PNorm = 82.5593, GNorm = 0.7444, lr_0 = 2.1840e-04
Loss = 9.2408e-02, PNorm = 82.5604, GNorm = 0.7333, lr_0 = 2.1825e-04
Loss = 8.3483e-02, PNorm = 82.5621, GNorm = 0.9215, lr_0 = 2.1810e-04
Loss = 9.0186e-02, PNorm = 82.5647, GNorm = 0.5728, lr_0 = 2.1795e-04
Loss = 1.0824e-01, PNorm = 82.5662, GNorm = 0.8954, lr_0 = 2.1780e-04
Loss = 8.4700e-02, PNorm = 82.5687, GNorm = 0.6635, lr_0 = 2.1765e-04
Loss = 9.6168e-02, PNorm = 82.5722, GNorm = 0.6011, lr_0 = 2.1751e-04
Loss = 8.8149e-02, PNorm = 82.5761, GNorm = 0.6698, lr_0 = 2.1736e-04
Loss = 9.1974e-02, PNorm = 82.5780, GNorm = 0.7735, lr_0 = 2.1721e-04
Loss = 8.3115e-02, PNorm = 82.5811, GNorm = 0.6614, lr_0 = 2.1706e-04
Loss = 8.6232e-02, PNorm = 82.5852, GNorm = 0.5234, lr_0 = 2.1691e-04
Loss = 1.0185e-01, PNorm = 82.5893, GNorm = 0.9080, lr_0 = 2.1676e-04
Loss = 8.7200e-02, PNorm = 82.5935, GNorm = 0.8637, lr_0 = 2.1661e-04
Loss = 1.0202e-01, PNorm = 82.5938, GNorm = 0.6124, lr_0 = 2.1646e-04
Loss = 1.0626e-01, PNorm = 82.5978, GNorm = 0.7766, lr_0 = 2.1632e-04
Loss = 1.0344e-01, PNorm = 82.6016, GNorm = 0.6880, lr_0 = 2.1617e-04
Loss = 9.4787e-02, PNorm = 82.6060, GNorm = 0.8517, lr_0 = 2.1602e-04
Loss = 9.2819e-02, PNorm = 82.6091, GNorm = 0.8364, lr_0 = 2.1587e-04
Loss = 1.0790e-01, PNorm = 82.6139, GNorm = 0.9746, lr_0 = 2.1572e-04
Loss = 9.1454e-02, PNorm = 82.6201, GNorm = 0.6261, lr_0 = 2.1558e-04
Loss = 8.0506e-02, PNorm = 82.6247, GNorm = 0.7119, lr_0 = 2.1543e-04
Loss = 9.6534e-02, PNorm = 82.6283, GNorm = 0.7377, lr_0 = 2.1528e-04
Loss = 8.7414e-02, PNorm = 82.6326, GNorm = 0.6193, lr_0 = 2.1513e-04
Loss = 9.0222e-02, PNorm = 82.6339, GNorm = 0.7019, lr_0 = 2.1499e-04
Loss = 1.1044e-01, PNorm = 82.6363, GNorm = 0.8961, lr_0 = 2.1484e-04
Loss = 8.2940e-02, PNorm = 82.6398, GNorm = 0.4869, lr_0 = 2.1469e-04
Loss = 7.9833e-02, PNorm = 82.6407, GNorm = 0.5652, lr_0 = 2.1454e-04
Loss = 9.8612e-02, PNorm = 82.6444, GNorm = 0.5696, lr_0 = 2.1440e-04
Loss = 9.7205e-02, PNorm = 82.6470, GNorm = 0.7185, lr_0 = 2.1425e-04
Loss = 9.1173e-02, PNorm = 82.6508, GNorm = 0.6262, lr_0 = 2.1410e-04
Loss = 1.0803e-01, PNorm = 82.6559, GNorm = 0.8702, lr_0 = 2.1396e-04
Loss = 7.8565e-02, PNorm = 82.6599, GNorm = 0.4661, lr_0 = 2.1381e-04
Loss = 9.7371e-02, PNorm = 82.6640, GNorm = 0.6021, lr_0 = 2.1366e-04
Loss = 8.5205e-02, PNorm = 82.6665, GNorm = 0.6292, lr_0 = 2.1352e-04
Loss = 9.8867e-02, PNorm = 82.6684, GNorm = 0.7668, lr_0 = 2.1337e-04
Loss = 9.2047e-02, PNorm = 82.6723, GNorm = 0.7936, lr_0 = 2.1323e-04
Loss = 9.5536e-02, PNorm = 82.6741, GNorm = 0.5018, lr_0 = 2.1308e-04
Loss = 9.5487e-02, PNorm = 82.6777, GNorm = 0.8138, lr_0 = 2.1293e-04
Loss = 9.2554e-02, PNorm = 82.6829, GNorm = 0.6623, lr_0 = 2.1279e-04
Loss = 1.0419e-01, PNorm = 82.6870, GNorm = 0.7056, lr_0 = 2.1264e-04
Loss = 9.8092e-02, PNorm = 82.6901, GNorm = 0.7681, lr_0 = 2.1250e-04
Loss = 9.0583e-02, PNorm = 82.6918, GNorm = 0.7477, lr_0 = 2.1235e-04
Loss = 8.9076e-02, PNorm = 82.6954, GNorm = 0.4532, lr_0 = 2.1221e-04
Loss = 1.0313e-01, PNorm = 82.7024, GNorm = 0.6292, lr_0 = 2.1206e-04
Loss = 8.6838e-02, PNorm = 82.7065, GNorm = 0.8258, lr_0 = 2.1191e-04
Loss = 1.0404e-01, PNorm = 82.7102, GNorm = 0.6885, lr_0 = 2.1177e-04
Loss = 9.6698e-02, PNorm = 82.7139, GNorm = 0.9165, lr_0 = 2.1162e-04
Loss = 9.8866e-02, PNorm = 82.7177, GNorm = 0.7933, lr_0 = 2.1148e-04
Loss = 9.0460e-02, PNorm = 82.7213, GNorm = 0.5234, lr_0 = 2.1133e-04
Loss = 8.6966e-02, PNorm = 82.7276, GNorm = 0.8619, lr_0 = 2.1119e-04
Loss = 8.8141e-02, PNorm = 82.7316, GNorm = 0.8450, lr_0 = 2.1104e-04
Loss = 9.7044e-02, PNorm = 82.7336, GNorm = 0.6568, lr_0 = 2.1090e-04
Loss = 9.5269e-02, PNorm = 82.7361, GNorm = 0.7069, lr_0 = 2.1076e-04
Loss = 9.5425e-02, PNorm = 82.7394, GNorm = 0.5655, lr_0 = 2.1061e-04
Loss = 9.9697e-02, PNorm = 82.7427, GNorm = 0.7160, lr_0 = 2.1047e-04
Loss = 1.0353e-01, PNorm = 82.7457, GNorm = 0.8069, lr_0 = 2.1032e-04
Loss = 1.0080e-01, PNorm = 82.7494, GNorm = 0.4647, lr_0 = 2.1018e-04
Loss = 8.5122e-02, PNorm = 82.7536, GNorm = 0.5557, lr_0 = 2.1003e-04
Loss = 9.7217e-02, PNorm = 82.7567, GNorm = 0.6692, lr_0 = 2.0989e-04
Loss = 9.7589e-02, PNorm = 82.7618, GNorm = 0.6743, lr_0 = 2.0975e-04
Loss = 8.5514e-02, PNorm = 82.7674, GNorm = 0.8653, lr_0 = 2.0960e-04
Validation mae = 0.228838
Epoch 21
Loss = 8.6503e-02, PNorm = 82.7719, GNorm = 0.6187, lr_0 = 2.0946e-04
Loss = 8.4067e-02, PNorm = 82.7775, GNorm = 0.8021, lr_0 = 2.0932e-04
Loss = 8.6338e-02, PNorm = 82.7789, GNorm = 0.6682, lr_0 = 2.0917e-04
Loss = 8.4536e-02, PNorm = 82.7793, GNorm = 0.7468, lr_0 = 2.0903e-04
Loss = 9.4691e-02, PNorm = 82.7841, GNorm = 0.7157, lr_0 = 2.0889e-04
Loss = 9.0940e-02, PNorm = 82.7881, GNorm = 0.7396, lr_0 = 2.0874e-04
Loss = 8.9546e-02, PNorm = 82.7924, GNorm = 0.8716, lr_0 = 2.0860e-04
Loss = 9.1986e-02, PNorm = 82.7980, GNorm = 0.6606, lr_0 = 2.0846e-04
Loss = 9.0471e-02, PNorm = 82.8021, GNorm = 0.5813, lr_0 = 2.0831e-04
Loss = 7.9468e-02, PNorm = 82.8052, GNorm = 0.6233, lr_0 = 2.0817e-04
Loss = 8.3579e-02, PNorm = 82.8068, GNorm = 0.6067, lr_0 = 2.0803e-04
Loss = 8.7259e-02, PNorm = 82.8078, GNorm = 0.8520, lr_0 = 2.0789e-04
Loss = 8.3431e-02, PNorm = 82.8105, GNorm = 0.6888, lr_0 = 2.0774e-04
Loss = 7.7575e-02, PNorm = 82.8140, GNorm = 0.6296, lr_0 = 2.0760e-04
Loss = 9.0687e-02, PNorm = 82.8172, GNorm = 0.6464, lr_0 = 2.0746e-04
Loss = 8.0408e-02, PNorm = 82.8253, GNorm = 0.7187, lr_0 = 2.0732e-04
Loss = 8.1547e-02, PNorm = 82.8315, GNorm = 0.5759, lr_0 = 2.0718e-04
Loss = 8.9759e-02, PNorm = 82.8344, GNorm = 0.9625, lr_0 = 2.0703e-04
Loss = 8.8970e-02, PNorm = 82.8405, GNorm = 0.5027, lr_0 = 2.0689e-04
Loss = 8.1456e-02, PNorm = 82.8460, GNorm = 0.4281, lr_0 = 2.0675e-04
Loss = 9.3843e-02, PNorm = 82.8509, GNorm = 0.6918, lr_0 = 2.0661e-04
Loss = 8.6628e-02, PNorm = 82.8551, GNorm = 0.6945, lr_0 = 2.0647e-04
Loss = 8.6876e-02, PNorm = 82.8597, GNorm = 0.6812, lr_0 = 2.0633e-04
Loss = 8.2146e-02, PNorm = 82.8633, GNorm = 0.7764, lr_0 = 2.0618e-04
Loss = 7.5864e-02, PNorm = 82.8660, GNorm = 0.6928, lr_0 = 2.0604e-04
Loss = 8.9635e-02, PNorm = 82.8677, GNorm = 1.1908, lr_0 = 2.0590e-04
Loss = 9.4720e-02, PNorm = 82.8696, GNorm = 0.6373, lr_0 = 2.0576e-04
Loss = 8.0575e-02, PNorm = 82.8720, GNorm = 0.5766, lr_0 = 2.0562e-04
Loss = 7.5645e-02, PNorm = 82.8719, GNorm = 0.6236, lr_0 = 2.0548e-04
Loss = 8.8579e-02, PNorm = 82.8750, GNorm = 0.8103, lr_0 = 2.0534e-04
Loss = 9.2033e-02, PNorm = 82.8783, GNorm = 0.6027, lr_0 = 2.0520e-04
Loss = 8.5698e-02, PNorm = 82.8809, GNorm = 0.4669, lr_0 = 2.0506e-04
Loss = 8.9039e-02, PNorm = 82.8871, GNorm = 0.6702, lr_0 = 2.0492e-04
Loss = 9.8604e-02, PNorm = 82.8915, GNorm = 0.6560, lr_0 = 2.0478e-04
Loss = 9.1904e-02, PNorm = 82.8941, GNorm = 0.5580, lr_0 = 2.0464e-04
Loss = 8.6794e-02, PNorm = 82.8968, GNorm = 0.6799, lr_0 = 2.0450e-04
Loss = 8.9444e-02, PNorm = 82.9004, GNorm = 0.5900, lr_0 = 2.0436e-04
Loss = 9.3170e-02, PNorm = 82.9031, GNorm = 0.7888, lr_0 = 2.0422e-04
Loss = 9.0675e-02, PNorm = 82.9063, GNorm = 0.5991, lr_0 = 2.0408e-04
Loss = 8.7582e-02, PNorm = 82.9076, GNorm = 0.6608, lr_0 = 2.0394e-04
Loss = 8.9028e-02, PNorm = 82.9118, GNorm = 0.6475, lr_0 = 2.0380e-04
Loss = 9.2667e-02, PNorm = 82.9184, GNorm = 0.6470, lr_0 = 2.0366e-04
Loss = 9.7764e-02, PNorm = 82.9228, GNorm = 0.6685, lr_0 = 2.0352e-04
Loss = 8.2799e-02, PNorm = 82.9256, GNorm = 0.6105, lr_0 = 2.0338e-04
Loss = 8.4813e-02, PNorm = 82.9277, GNorm = 0.6000, lr_0 = 2.0324e-04
Loss = 8.4784e-02, PNorm = 82.9326, GNorm = 0.5518, lr_0 = 2.0310e-04
Loss = 9.4713e-02, PNorm = 82.9319, GNorm = 0.6194, lr_0 = 2.0296e-04
Loss = 8.8581e-02, PNorm = 82.9343, GNorm = 0.9058, lr_0 = 2.0282e-04
Loss = 7.5043e-02, PNorm = 82.9384, GNorm = 0.6227, lr_0 = 2.0268e-04
Loss = 8.9259e-02, PNorm = 82.9420, GNorm = 0.6282, lr_0 = 2.0254e-04
Loss = 9.6301e-02, PNorm = 82.9457, GNorm = 0.5952, lr_0 = 2.0240e-04
Loss = 7.8156e-02, PNorm = 82.9501, GNorm = 0.6731, lr_0 = 2.0227e-04
Loss = 8.7868e-02, PNorm = 82.9549, GNorm = 0.5708, lr_0 = 2.0213e-04
Loss = 9.7322e-02, PNorm = 82.9595, GNorm = 0.7208, lr_0 = 2.0199e-04
Loss = 8.7767e-02, PNorm = 82.9613, GNorm = 0.5540, lr_0 = 2.0185e-04
Loss = 8.0579e-02, PNorm = 82.9636, GNorm = 0.4988, lr_0 = 2.0171e-04
Loss = 9.3779e-02, PNorm = 82.9644, GNorm = 0.4643, lr_0 = 2.0157e-04
Loss = 9.0340e-02, PNorm = 82.9687, GNorm = 0.7340, lr_0 = 2.0144e-04
Loss = 8.3689e-02, PNorm = 82.9733, GNorm = 0.6600, lr_0 = 2.0130e-04
Loss = 8.9312e-02, PNorm = 82.9779, GNorm = 0.7507, lr_0 = 2.0116e-04
Loss = 8.2385e-02, PNorm = 82.9808, GNorm = 0.6474, lr_0 = 2.0102e-04
Loss = 8.4561e-02, PNorm = 82.9829, GNorm = 0.7873, lr_0 = 2.0088e-04
Loss = 9.1089e-02, PNorm = 82.9857, GNorm = 0.5643, lr_0 = 2.0075e-04
Loss = 8.2976e-02, PNorm = 82.9894, GNorm = 0.6258, lr_0 = 2.0061e-04
Loss = 9.4689e-02, PNorm = 82.9936, GNorm = 0.6340, lr_0 = 2.0047e-04
Loss = 8.8773e-02, PNorm = 82.9977, GNorm = 0.4971, lr_0 = 2.0033e-04
Loss = 1.0648e-01, PNorm = 83.0012, GNorm = 0.6171, lr_0 = 2.0020e-04
Loss = 8.5722e-02, PNorm = 83.0078, GNorm = 1.0092, lr_0 = 2.0006e-04
Loss = 9.1585e-02, PNorm = 83.0105, GNorm = 0.6889, lr_0 = 1.9992e-04
Loss = 8.0816e-02, PNorm = 83.0139, GNorm = 0.8040, lr_0 = 1.9979e-04
Loss = 8.6590e-02, PNorm = 83.0194, GNorm = 0.5763, lr_0 = 1.9965e-04
Loss = 8.3555e-02, PNorm = 83.0226, GNorm = 0.5180, lr_0 = 1.9951e-04
Loss = 9.2956e-02, PNorm = 83.0260, GNorm = 0.7922, lr_0 = 1.9938e-04
Loss = 1.0660e-01, PNorm = 83.0279, GNorm = 0.7658, lr_0 = 1.9924e-04
Loss = 8.1112e-02, PNorm = 83.0316, GNorm = 0.6743, lr_0 = 1.9910e-04
Loss = 8.6135e-02, PNorm = 83.0343, GNorm = 0.7053, lr_0 = 1.9897e-04
Loss = 8.6535e-02, PNorm = 83.0380, GNorm = 0.5432, lr_0 = 1.9883e-04
Loss = 1.0106e-01, PNorm = 83.0427, GNorm = 0.7347, lr_0 = 1.9869e-04
Loss = 7.7359e-02, PNorm = 83.0465, GNorm = 0.9025, lr_0 = 1.9856e-04
Loss = 7.9062e-02, PNorm = 83.0479, GNorm = 0.7129, lr_0 = 1.9842e-04
Loss = 9.1509e-02, PNorm = 83.0522, GNorm = 0.7029, lr_0 = 1.9829e-04
Loss = 9.4452e-02, PNorm = 83.0565, GNorm = 0.6827, lr_0 = 1.9815e-04
Loss = 1.0840e-01, PNorm = 83.0619, GNorm = 0.7463, lr_0 = 1.9801e-04
Loss = 8.5281e-02, PNorm = 83.0647, GNorm = 0.7017, lr_0 = 1.9788e-04
Loss = 9.0978e-02, PNorm = 83.0670, GNorm = 0.6231, lr_0 = 1.9774e-04
Loss = 8.2868e-02, PNorm = 83.0704, GNorm = 0.5367, lr_0 = 1.9761e-04
Loss = 8.8650e-02, PNorm = 83.0742, GNorm = 0.6654, lr_0 = 1.9747e-04
Loss = 9.9331e-02, PNorm = 83.0776, GNorm = 0.6803, lr_0 = 1.9734e-04
Loss = 7.7078e-02, PNorm = 83.0801, GNorm = 0.4653, lr_0 = 1.9720e-04
Loss = 9.6685e-02, PNorm = 83.0827, GNorm = 0.8642, lr_0 = 1.9707e-04
Loss = 8.2106e-02, PNorm = 83.0876, GNorm = 0.7293, lr_0 = 1.9693e-04
Loss = 9.8074e-02, PNorm = 83.0917, GNorm = 0.7830, lr_0 = 1.9680e-04
Loss = 7.8857e-02, PNorm = 83.0944, GNorm = 0.5374, lr_0 = 1.9666e-04
Loss = 8.8602e-02, PNorm = 83.0980, GNorm = 0.6106, lr_0 = 1.9653e-04
Loss = 1.0113e-01, PNorm = 83.1039, GNorm = 0.8246, lr_0 = 1.9639e-04
Loss = 9.7732e-02, PNorm = 83.1088, GNorm = 0.6802, lr_0 = 1.9626e-04
Loss = 8.9920e-02, PNorm = 83.1114, GNorm = 0.4783, lr_0 = 1.9612e-04
Loss = 8.7091e-02, PNorm = 83.1128, GNorm = 0.4450, lr_0 = 1.9599e-04
Loss = 9.9738e-02, PNorm = 83.1163, GNorm = 0.5502, lr_0 = 1.9585e-04
Loss = 9.2698e-02, PNorm = 83.1215, GNorm = 0.6806, lr_0 = 1.9572e-04
Loss = 9.1354e-02, PNorm = 83.1243, GNorm = 1.0763, lr_0 = 1.9559e-04
Loss = 7.7792e-02, PNorm = 83.1288, GNorm = 0.5190, lr_0 = 1.9545e-04
Loss = 9.9935e-02, PNorm = 83.1322, GNorm = 0.6962, lr_0 = 1.9532e-04
Loss = 9.3877e-02, PNorm = 83.1374, GNorm = 0.7666, lr_0 = 1.9518e-04
Loss = 8.2990e-02, PNorm = 83.1392, GNorm = 0.5639, lr_0 = 1.9505e-04
Loss = 8.0568e-02, PNorm = 83.1408, GNorm = 1.0019, lr_0 = 1.9492e-04
Loss = 1.1706e-01, PNorm = 83.1439, GNorm = 0.7554, lr_0 = 1.9478e-04
Loss = 1.1022e-01, PNorm = 83.1483, GNorm = 0.9464, lr_0 = 1.9465e-04
Loss = 8.7698e-02, PNorm = 83.1511, GNorm = 0.7798, lr_0 = 1.9452e-04
Loss = 8.8588e-02, PNorm = 83.1549, GNorm = 0.6184, lr_0 = 1.9438e-04
Loss = 1.0046e-01, PNorm = 83.1575, GNorm = 0.7089, lr_0 = 1.9425e-04
Loss = 8.8994e-02, PNorm = 83.1611, GNorm = 0.5931, lr_0 = 1.9412e-04
Loss = 8.6188e-02, PNorm = 83.1635, GNorm = 0.5446, lr_0 = 1.9398e-04
Loss = 8.8460e-02, PNorm = 83.1662, GNorm = 0.5655, lr_0 = 1.9385e-04
Loss = 1.0136e-01, PNorm = 83.1670, GNorm = 0.5403, lr_0 = 1.9372e-04
Loss = 8.8877e-02, PNorm = 83.1696, GNorm = 0.6056, lr_0 = 1.9359e-04
Loss = 8.3641e-02, PNorm = 83.1725, GNorm = 0.5333, lr_0 = 1.9345e-04
Loss = 8.9825e-02, PNorm = 83.1752, GNorm = 0.5909, lr_0 = 1.9332e-04
Loss = 1.0818e-01, PNorm = 83.1778, GNorm = 0.6736, lr_0 = 1.9319e-04
Loss = 8.5512e-02, PNorm = 83.1804, GNorm = 0.6394, lr_0 = 1.9306e-04
Validation mae = 0.230701
Epoch 22
Loss = 8.5096e-02, PNorm = 83.1866, GNorm = 0.6441, lr_0 = 1.9292e-04
Loss = 7.8465e-02, PNorm = 83.1909, GNorm = 0.6049, lr_0 = 1.9279e-04
Loss = 7.5389e-02, PNorm = 83.1947, GNorm = 0.4838, lr_0 = 1.9266e-04
Loss = 8.5419e-02, PNorm = 83.1975, GNorm = 0.5378, lr_0 = 1.9253e-04
Loss = 8.6015e-02, PNorm = 83.1997, GNorm = 0.6065, lr_0 = 1.9240e-04
Loss = 8.0787e-02, PNorm = 83.2025, GNorm = 0.7205, lr_0 = 1.9226e-04
Loss = 8.3950e-02, PNorm = 83.2047, GNorm = 0.5599, lr_0 = 1.9213e-04
Loss = 7.8608e-02, PNorm = 83.2053, GNorm = 0.5830, lr_0 = 1.9200e-04
Loss = 7.9781e-02, PNorm = 83.2072, GNorm = 0.5111, lr_0 = 1.9187e-04
Loss = 8.6160e-02, PNorm = 83.2119, GNorm = 0.8676, lr_0 = 1.9174e-04
Loss = 8.6253e-02, PNorm = 83.2163, GNorm = 0.6391, lr_0 = 1.9161e-04
Loss = 9.4400e-02, PNorm = 83.2228, GNorm = 0.5072, lr_0 = 1.9148e-04
Loss = 8.7336e-02, PNorm = 83.2276, GNorm = 0.7421, lr_0 = 1.9134e-04
Loss = 8.0304e-02, PNorm = 83.2294, GNorm = 0.6108, lr_0 = 1.9121e-04
Loss = 7.9872e-02, PNorm = 83.2314, GNorm = 0.4326, lr_0 = 1.9108e-04
Loss = 9.0616e-02, PNorm = 83.2331, GNorm = 0.7357, lr_0 = 1.9095e-04
Loss = 9.5656e-02, PNorm = 83.2334, GNorm = 0.7424, lr_0 = 1.9082e-04
Loss = 7.9373e-02, PNorm = 83.2353, GNorm = 0.7094, lr_0 = 1.9069e-04
Loss = 9.2313e-02, PNorm = 83.2393, GNorm = 0.7691, lr_0 = 1.9056e-04
Loss = 7.4764e-02, PNorm = 83.2444, GNorm = 0.4954, lr_0 = 1.9043e-04
Loss = 7.8085e-02, PNorm = 83.2479, GNorm = 0.5212, lr_0 = 1.9030e-04
Loss = 7.6944e-02, PNorm = 83.2487, GNorm = 0.5711, lr_0 = 1.9017e-04
Loss = 8.0390e-02, PNorm = 83.2533, GNorm = 0.5939, lr_0 = 1.9004e-04
Loss = 8.6822e-02, PNorm = 83.2577, GNorm = 0.4712, lr_0 = 1.8991e-04
Loss = 8.6797e-02, PNorm = 83.2621, GNorm = 0.7037, lr_0 = 1.8978e-04
Loss = 6.6720e-02, PNorm = 83.2649, GNorm = 0.5849, lr_0 = 1.8965e-04
Loss = 9.2602e-02, PNorm = 83.2651, GNorm = 0.6998, lr_0 = 1.8952e-04
Loss = 8.1053e-02, PNorm = 83.2668, GNorm = 0.6467, lr_0 = 1.8939e-04
Loss = 8.0007e-02, PNorm = 83.2704, GNorm = 0.6025, lr_0 = 1.8926e-04
Loss = 8.2635e-02, PNorm = 83.2740, GNorm = 0.7212, lr_0 = 1.8913e-04
Loss = 8.2539e-02, PNorm = 83.2787, GNorm = 0.5885, lr_0 = 1.8900e-04
Loss = 7.8021e-02, PNorm = 83.2820, GNorm = 0.5620, lr_0 = 1.8887e-04
Loss = 8.2480e-02, PNorm = 83.2834, GNorm = 0.8425, lr_0 = 1.8874e-04
Loss = 9.3425e-02, PNorm = 83.2846, GNorm = 0.6636, lr_0 = 1.8861e-04
Loss = 9.1635e-02, PNorm = 83.2877, GNorm = 0.6024, lr_0 = 1.8848e-04
Loss = 7.6170e-02, PNorm = 83.2919, GNorm = 0.5319, lr_0 = 1.8835e-04
Loss = 9.2020e-02, PNorm = 83.2953, GNorm = 0.6796, lr_0 = 1.8822e-04
Loss = 7.5437e-02, PNorm = 83.2979, GNorm = 0.7933, lr_0 = 1.8809e-04
Loss = 8.9104e-02, PNorm = 83.2993, GNorm = 0.6801, lr_0 = 1.8797e-04
Loss = 9.2670e-02, PNorm = 83.3029, GNorm = 0.7286, lr_0 = 1.8784e-04
Loss = 8.3826e-02, PNorm = 83.3081, GNorm = 0.6636, lr_0 = 1.8771e-04
Loss = 8.9319e-02, PNorm = 83.3120, GNorm = 0.8154, lr_0 = 1.8758e-04
Loss = 9.0965e-02, PNorm = 83.3163, GNorm = 0.6126, lr_0 = 1.8745e-04
Loss = 7.8008e-02, PNorm = 83.3216, GNorm = 0.6883, lr_0 = 1.8732e-04
Loss = 7.8008e-02, PNorm = 83.3235, GNorm = 0.6234, lr_0 = 1.8719e-04
Loss = 8.7342e-02, PNorm = 83.3254, GNorm = 0.6400, lr_0 = 1.8707e-04
Loss = 8.9629e-02, PNorm = 83.3320, GNorm = 0.7949, lr_0 = 1.8694e-04
Loss = 8.2469e-02, PNorm = 83.3341, GNorm = 0.6872, lr_0 = 1.8681e-04
Loss = 1.0382e-01, PNorm = 83.3370, GNorm = 0.6881, lr_0 = 1.8668e-04
Loss = 8.5355e-02, PNorm = 83.3407, GNorm = 0.6708, lr_0 = 1.8655e-04
Loss = 8.1716e-02, PNorm = 83.3435, GNorm = 0.6695, lr_0 = 1.8643e-04
Loss = 9.9690e-02, PNorm = 83.3454, GNorm = 0.4891, lr_0 = 1.8630e-04
Loss = 8.0673e-02, PNorm = 83.3470, GNorm = 0.5609, lr_0 = 1.8617e-04
Loss = 8.2949e-02, PNorm = 83.3480, GNorm = 0.6276, lr_0 = 1.8604e-04
Loss = 8.1107e-02, PNorm = 83.3503, GNorm = 0.6211, lr_0 = 1.8592e-04
Loss = 9.2942e-02, PNorm = 83.3549, GNorm = 0.6975, lr_0 = 1.8579e-04
Loss = 9.1616e-02, PNorm = 83.3598, GNorm = 0.5997, lr_0 = 1.8566e-04
Loss = 8.6092e-02, PNorm = 83.3627, GNorm = 0.6795, lr_0 = 1.8553e-04
Loss = 9.1017e-02, PNorm = 83.3649, GNorm = 0.6604, lr_0 = 1.8541e-04
Loss = 7.9842e-02, PNorm = 83.3688, GNorm = 0.5007, lr_0 = 1.8528e-04
Loss = 7.5564e-02, PNorm = 83.3721, GNorm = 0.5994, lr_0 = 1.8515e-04
Loss = 8.8542e-02, PNorm = 83.3768, GNorm = 0.6226, lr_0 = 1.8503e-04
Loss = 8.5862e-02, PNorm = 83.3807, GNorm = 0.5937, lr_0 = 1.8490e-04
Loss = 7.7249e-02, PNorm = 83.3819, GNorm = 0.5724, lr_0 = 1.8477e-04
Loss = 8.6665e-02, PNorm = 83.3837, GNorm = 0.6684, lr_0 = 1.8465e-04
Loss = 7.8523e-02, PNorm = 83.3870, GNorm = 0.6866, lr_0 = 1.8452e-04
Loss = 8.5428e-02, PNorm = 83.3882, GNorm = 0.4983, lr_0 = 1.8439e-04
Loss = 9.1676e-02, PNorm = 83.3923, GNorm = 0.6019, lr_0 = 1.8427e-04
Loss = 8.2715e-02, PNorm = 83.3968, GNorm = 0.6137, lr_0 = 1.8414e-04
Loss = 8.6848e-02, PNorm = 83.3980, GNorm = 0.5491, lr_0 = 1.8401e-04
Loss = 8.1276e-02, PNorm = 83.3983, GNorm = 0.6128, lr_0 = 1.8389e-04
Loss = 8.3631e-02, PNorm = 83.3996, GNorm = 0.5363, lr_0 = 1.8376e-04
Loss = 8.7255e-02, PNorm = 83.4006, GNorm = 0.7206, lr_0 = 1.8364e-04
Loss = 8.3537e-02, PNorm = 83.4018, GNorm = 0.5247, lr_0 = 1.8351e-04
Loss = 8.0041e-02, PNorm = 83.4052, GNorm = 0.6005, lr_0 = 1.8338e-04
Loss = 8.7455e-02, PNorm = 83.4074, GNorm = 0.8794, lr_0 = 1.8326e-04
Loss = 7.8547e-02, PNorm = 83.4081, GNorm = 0.6325, lr_0 = 1.8313e-04
Loss = 8.8273e-02, PNorm = 83.4092, GNorm = 0.6373, lr_0 = 1.8301e-04
Loss = 8.4670e-02, PNorm = 83.4126, GNorm = 0.6610, lr_0 = 1.8288e-04
Loss = 7.8137e-02, PNorm = 83.4139, GNorm = 0.5460, lr_0 = 1.8276e-04
Loss = 8.6618e-02, PNorm = 83.4160, GNorm = 0.6688, lr_0 = 1.8263e-04
Loss = 9.2288e-02, PNorm = 83.4167, GNorm = 0.8898, lr_0 = 1.8251e-04
Loss = 8.0334e-02, PNorm = 83.4178, GNorm = 0.5571, lr_0 = 1.8238e-04
Loss = 9.9356e-02, PNorm = 83.4208, GNorm = 0.5351, lr_0 = 1.8226e-04
Loss = 8.6622e-02, PNorm = 83.4250, GNorm = 0.8147, lr_0 = 1.8213e-04
Loss = 8.4345e-02, PNorm = 83.4274, GNorm = 0.8797, lr_0 = 1.8201e-04
Loss = 9.0604e-02, PNorm = 83.4304, GNorm = 0.7300, lr_0 = 1.8188e-04
Loss = 8.7677e-02, PNorm = 83.4340, GNorm = 0.5938, lr_0 = 1.8176e-04
Loss = 9.3312e-02, PNorm = 83.4357, GNorm = 0.6955, lr_0 = 1.8163e-04
Loss = 8.8929e-02, PNorm = 83.4375, GNorm = 0.5640, lr_0 = 1.8151e-04
Loss = 9.0042e-02, PNorm = 83.4408, GNorm = 0.8875, lr_0 = 1.8138e-04
Loss = 1.0992e-01, PNorm = 83.4441, GNorm = 0.6330, lr_0 = 1.8126e-04
Loss = 9.0958e-02, PNorm = 83.4479, GNorm = 0.5580, lr_0 = 1.8114e-04
Loss = 9.2070e-02, PNorm = 83.4515, GNorm = 0.6327, lr_0 = 1.8101e-04
Loss = 8.9439e-02, PNorm = 83.4529, GNorm = 0.7877, lr_0 = 1.8089e-04
Loss = 8.9823e-02, PNorm = 83.4556, GNorm = 0.7871, lr_0 = 1.8076e-04
Loss = 8.7179e-02, PNorm = 83.4557, GNorm = 0.5312, lr_0 = 1.8064e-04
Loss = 9.5146e-02, PNorm = 83.4564, GNorm = 0.5436, lr_0 = 1.8052e-04
Loss = 8.9138e-02, PNorm = 83.4612, GNorm = 0.8563, lr_0 = 1.8039e-04
Loss = 8.4733e-02, PNorm = 83.4644, GNorm = 0.9253, lr_0 = 1.8027e-04
Loss = 8.9036e-02, PNorm = 83.4664, GNorm = 0.6610, lr_0 = 1.8015e-04
Loss = 9.5973e-02, PNorm = 83.4718, GNorm = 0.4940, lr_0 = 1.8002e-04
Loss = 8.6932e-02, PNorm = 83.4781, GNorm = 0.7377, lr_0 = 1.7990e-04
Loss = 7.9765e-02, PNorm = 83.4813, GNorm = 0.6367, lr_0 = 1.7978e-04
Loss = 8.9110e-02, PNorm = 83.4848, GNorm = 0.8701, lr_0 = 1.7965e-04
Loss = 8.8484e-02, PNorm = 83.4882, GNorm = 0.7734, lr_0 = 1.7953e-04
Loss = 9.8377e-02, PNorm = 83.4885, GNorm = 0.9874, lr_0 = 1.7941e-04
Loss = 8.3428e-02, PNorm = 83.4913, GNorm = 0.5561, lr_0 = 1.7928e-04
Loss = 8.7006e-02, PNorm = 83.4941, GNorm = 0.6652, lr_0 = 1.7916e-04
Loss = 8.3318e-02, PNorm = 83.4973, GNorm = 0.5587, lr_0 = 1.7904e-04
Loss = 8.1494e-02, PNorm = 83.4993, GNorm = 0.6303, lr_0 = 1.7892e-04
Loss = 9.0523e-02, PNorm = 83.5002, GNorm = 0.8188, lr_0 = 1.7879e-04
Loss = 9.4787e-02, PNorm = 83.5010, GNorm = 0.7302, lr_0 = 1.7867e-04
Loss = 8.4952e-02, PNorm = 83.5033, GNorm = 0.8060, lr_0 = 1.7855e-04
Loss = 9.4762e-02, PNorm = 83.5082, GNorm = 0.5774, lr_0 = 1.7843e-04
Loss = 8.8269e-02, PNorm = 83.5123, GNorm = 1.1286, lr_0 = 1.7830e-04
Loss = 8.7642e-02, PNorm = 83.5158, GNorm = 0.6732, lr_0 = 1.7818e-04
Loss = 7.9902e-02, PNorm = 83.5164, GNorm = 0.7897, lr_0 = 1.7806e-04
Loss = 9.5994e-02, PNorm = 83.5185, GNorm = 0.7592, lr_0 = 1.7794e-04
Loss = 9.5477e-02, PNorm = 83.5212, GNorm = 0.7468, lr_0 = 1.7782e-04
Validation mae = 0.229333
Epoch 23
Loss = 8.0662e-02, PNorm = 83.5238, GNorm = 0.4980, lr_0 = 1.7769e-04
Loss = 7.9811e-02, PNorm = 83.5272, GNorm = 0.6749, lr_0 = 1.7757e-04
Loss = 8.1296e-02, PNorm = 83.5334, GNorm = 0.5994, lr_0 = 1.7745e-04
Loss = 7.9330e-02, PNorm = 83.5373, GNorm = 0.8003, lr_0 = 1.7733e-04
Loss = 8.9700e-02, PNorm = 83.5386, GNorm = 0.8137, lr_0 = 1.7721e-04
Loss = 8.0427e-02, PNorm = 83.5426, GNorm = 0.6093, lr_0 = 1.7709e-04
Loss = 8.8397e-02, PNorm = 83.5449, GNorm = 0.9515, lr_0 = 1.7696e-04
Loss = 8.6108e-02, PNorm = 83.5446, GNorm = 0.8037, lr_0 = 1.7684e-04
Loss = 8.3450e-02, PNorm = 83.5470, GNorm = 0.8178, lr_0 = 1.7672e-04
Loss = 9.3333e-02, PNorm = 83.5504, GNorm = 0.5432, lr_0 = 1.7660e-04
Loss = 8.7011e-02, PNorm = 83.5516, GNorm = 0.4920, lr_0 = 1.7648e-04
Loss = 9.8801e-02, PNorm = 83.5543, GNorm = 0.7538, lr_0 = 1.7636e-04
Loss = 8.1754e-02, PNorm = 83.5563, GNorm = 0.7154, lr_0 = 1.7624e-04
Loss = 8.4896e-02, PNorm = 83.5586, GNorm = 0.4801, lr_0 = 1.7612e-04
Loss = 8.4764e-02, PNorm = 83.5641, GNorm = 0.8714, lr_0 = 1.7600e-04
Loss = 8.0285e-02, PNorm = 83.5663, GNorm = 0.6437, lr_0 = 1.7588e-04
Loss = 7.7429e-02, PNorm = 83.5679, GNorm = 0.5369, lr_0 = 1.7576e-04
Loss = 9.1457e-02, PNorm = 83.5726, GNorm = 0.8971, lr_0 = 1.7564e-04
Loss = 8.3429e-02, PNorm = 83.5768, GNorm = 0.5314, lr_0 = 1.7552e-04
Loss = 7.6017e-02, PNorm = 83.5795, GNorm = 0.6340, lr_0 = 1.7540e-04
Loss = 7.5571e-02, PNorm = 83.5825, GNorm = 0.5891, lr_0 = 1.7528e-04
Loss = 8.8727e-02, PNorm = 83.5832, GNorm = 0.6230, lr_0 = 1.7516e-04
Loss = 8.3033e-02, PNorm = 83.5859, GNorm = 0.5882, lr_0 = 1.7504e-04
Loss = 8.5884e-02, PNorm = 83.5882, GNorm = 0.7155, lr_0 = 1.7492e-04
Loss = 8.2511e-02, PNorm = 83.5908, GNorm = 0.5048, lr_0 = 1.7480e-04
Loss = 9.0210e-02, PNorm = 83.5955, GNorm = 0.9314, lr_0 = 1.7468e-04
Loss = 8.4264e-02, PNorm = 83.5996, GNorm = 0.6963, lr_0 = 1.7456e-04
Loss = 7.8446e-02, PNorm = 83.6051, GNorm = 0.4080, lr_0 = 1.7444e-04
Loss = 7.8199e-02, PNorm = 83.6089, GNorm = 0.5073, lr_0 = 1.7432e-04
Loss = 8.6192e-02, PNorm = 83.6107, GNorm = 0.6060, lr_0 = 1.7420e-04
Loss = 8.4031e-02, PNorm = 83.6128, GNorm = 0.8301, lr_0 = 1.7408e-04
Loss = 9.4101e-02, PNorm = 83.6142, GNorm = 0.6515, lr_0 = 1.7396e-04
Loss = 8.7525e-02, PNorm = 83.6184, GNorm = 0.7116, lr_0 = 1.7384e-04
Loss = 8.1841e-02, PNorm = 83.6224, GNorm = 0.5873, lr_0 = 1.7372e-04
Loss = 7.1047e-02, PNorm = 83.6237, GNorm = 0.6314, lr_0 = 1.7360e-04
Loss = 7.8008e-02, PNorm = 83.6250, GNorm = 0.7250, lr_0 = 1.7348e-04
Loss = 7.9928e-02, PNorm = 83.6272, GNorm = 0.5699, lr_0 = 1.7336e-04
Loss = 9.2910e-02, PNorm = 83.6298, GNorm = 0.5520, lr_0 = 1.7325e-04
Loss = 9.9982e-02, PNorm = 83.6343, GNorm = 0.8069, lr_0 = 1.7313e-04
Loss = 8.0707e-02, PNorm = 83.6395, GNorm = 0.5856, lr_0 = 1.7301e-04
Loss = 7.8725e-02, PNorm = 83.6427, GNorm = 0.7964, lr_0 = 1.7289e-04
Loss = 8.9522e-02, PNorm = 83.6446, GNorm = 0.5253, lr_0 = 1.7277e-04
Loss = 7.7689e-02, PNorm = 83.6483, GNorm = 0.5681, lr_0 = 1.7265e-04
Loss = 9.2608e-02, PNorm = 83.6516, GNorm = 0.6133, lr_0 = 1.7253e-04
Loss = 7.6542e-02, PNorm = 83.6547, GNorm = 0.4684, lr_0 = 1.7242e-04
Loss = 8.3655e-02, PNorm = 83.6577, GNorm = 0.7601, lr_0 = 1.7230e-04
Loss = 9.6467e-02, PNorm = 83.6600, GNorm = 0.6017, lr_0 = 1.7218e-04
Loss = 8.8713e-02, PNorm = 83.6615, GNorm = 0.7431, lr_0 = 1.7206e-04
Loss = 8.0162e-02, PNorm = 83.6636, GNorm = 0.5582, lr_0 = 1.7194e-04
Loss = 7.6423e-02, PNorm = 83.6656, GNorm = 0.4842, lr_0 = 1.7183e-04
Loss = 7.5133e-02, PNorm = 83.6686, GNorm = 0.6286, lr_0 = 1.7171e-04
Loss = 8.4302e-02, PNorm = 83.6710, GNorm = 0.8940, lr_0 = 1.7159e-04
Loss = 9.4398e-02, PNorm = 83.6741, GNorm = 0.6065, lr_0 = 1.7147e-04
Loss = 9.0032e-02, PNorm = 83.6768, GNorm = 0.7854, lr_0 = 1.7136e-04
Loss = 9.4580e-02, PNorm = 83.6804, GNorm = 0.6085, lr_0 = 1.7124e-04
Loss = 7.9816e-02, PNorm = 83.6837, GNorm = 0.6585, lr_0 = 1.7112e-04
Loss = 8.5087e-02, PNorm = 83.6859, GNorm = 0.5947, lr_0 = 1.7100e-04
Loss = 8.4505e-02, PNorm = 83.6899, GNorm = 0.5395, lr_0 = 1.7089e-04
Loss = 8.3248e-02, PNorm = 83.6959, GNorm = 0.9169, lr_0 = 1.7077e-04
Loss = 8.3366e-02, PNorm = 83.6976, GNorm = 0.8032, lr_0 = 1.7065e-04
Loss = 8.4507e-02, PNorm = 83.6974, GNorm = 0.6089, lr_0 = 1.7054e-04
Loss = 8.2785e-02, PNorm = 83.6982, GNorm = 0.9330, lr_0 = 1.7042e-04
Loss = 8.3838e-02, PNorm = 83.7001, GNorm = 1.0757, lr_0 = 1.7030e-04
Loss = 7.6756e-02, PNorm = 83.7012, GNorm = 0.6699, lr_0 = 1.7019e-04
Loss = 8.9726e-02, PNorm = 83.7042, GNorm = 0.5931, lr_0 = 1.7007e-04
Loss = 9.1965e-02, PNorm = 83.7069, GNorm = 0.5790, lr_0 = 1.6995e-04
Loss = 7.9016e-02, PNorm = 83.7087, GNorm = 0.5318, lr_0 = 1.6984e-04
Loss = 8.3006e-02, PNorm = 83.7098, GNorm = 0.6560, lr_0 = 1.6972e-04
Loss = 8.4314e-02, PNorm = 83.7128, GNorm = 0.5591, lr_0 = 1.6960e-04
Loss = 8.1217e-02, PNorm = 83.7143, GNorm = 0.5119, lr_0 = 1.6949e-04
Loss = 9.1401e-02, PNorm = 83.7171, GNorm = 0.9132, lr_0 = 1.6937e-04
Loss = 7.4832e-02, PNorm = 83.7196, GNorm = 0.7411, lr_0 = 1.6926e-04
Loss = 9.2317e-02, PNorm = 83.7224, GNorm = 0.7572, lr_0 = 1.6914e-04
Loss = 8.9517e-02, PNorm = 83.7250, GNorm = 0.6313, lr_0 = 1.6902e-04
Loss = 8.3549e-02, PNorm = 83.7276, GNorm = 0.6065, lr_0 = 1.6891e-04
Loss = 8.3956e-02, PNorm = 83.7323, GNorm = 0.5118, lr_0 = 1.6879e-04
Loss = 9.0618e-02, PNorm = 83.7362, GNorm = 0.4480, lr_0 = 1.6868e-04
Loss = 7.8764e-02, PNorm = 83.7432, GNorm = 0.6677, lr_0 = 1.6856e-04
Loss = 9.4889e-02, PNorm = 83.7449, GNorm = 0.7642, lr_0 = 1.6845e-04
Loss = 7.9139e-02, PNorm = 83.7448, GNorm = 0.7635, lr_0 = 1.6833e-04
Loss = 7.8412e-02, PNorm = 83.7482, GNorm = 0.5735, lr_0 = 1.6821e-04
Loss = 9.9047e-02, PNorm = 83.7513, GNorm = 0.6716, lr_0 = 1.6810e-04
Loss = 8.3436e-02, PNorm = 83.7553, GNorm = 0.4268, lr_0 = 1.6798e-04
Loss = 7.0940e-02, PNorm = 83.7566, GNorm = 0.6904, lr_0 = 1.6787e-04
Loss = 9.8412e-02, PNorm = 83.7603, GNorm = 0.6426, lr_0 = 1.6775e-04
Loss = 8.0102e-02, PNorm = 83.7631, GNorm = 0.6760, lr_0 = 1.6764e-04
Loss = 7.5183e-02, PNorm = 83.7648, GNorm = 0.6068, lr_0 = 1.6752e-04
Loss = 9.0987e-02, PNorm = 83.7650, GNorm = 0.6574, lr_0 = 1.6741e-04
Loss = 7.3054e-02, PNorm = 83.7652, GNorm = 0.5494, lr_0 = 1.6729e-04
Loss = 9.1517e-02, PNorm = 83.7663, GNorm = 0.7014, lr_0 = 1.6718e-04
Loss = 8.7664e-02, PNorm = 83.7667, GNorm = 0.4874, lr_0 = 1.6707e-04
Loss = 7.7815e-02, PNorm = 83.7711, GNorm = 0.7595, lr_0 = 1.6695e-04
Loss = 7.7035e-02, PNorm = 83.7753, GNorm = 0.5873, lr_0 = 1.6684e-04
Loss = 8.0834e-02, PNorm = 83.7791, GNorm = 1.0828, lr_0 = 1.6672e-04
Loss = 9.2566e-02, PNorm = 83.7812, GNorm = 1.0410, lr_0 = 1.6661e-04
Loss = 8.6863e-02, PNorm = 83.7823, GNorm = 0.7370, lr_0 = 1.6649e-04
Loss = 8.5676e-02, PNorm = 83.7860, GNorm = 1.0606, lr_0 = 1.6638e-04
Loss = 8.6912e-02, PNorm = 83.7885, GNorm = 0.4441, lr_0 = 1.6627e-04
Loss = 8.1929e-02, PNorm = 83.7932, GNorm = 1.2528, lr_0 = 1.6615e-04
Loss = 7.0197e-02, PNorm = 83.7966, GNorm = 0.6114, lr_0 = 1.6604e-04
Loss = 7.8661e-02, PNorm = 83.7969, GNorm = 0.7594, lr_0 = 1.6592e-04
Loss = 9.0862e-02, PNorm = 83.8013, GNorm = 0.7676, lr_0 = 1.6581e-04
Loss = 9.4500e-02, PNorm = 83.8059, GNorm = 0.6234, lr_0 = 1.6570e-04
Loss = 8.7729e-02, PNorm = 83.8072, GNorm = 0.7786, lr_0 = 1.6558e-04
Loss = 8.3188e-02, PNorm = 83.8092, GNorm = 0.5666, lr_0 = 1.6547e-04
Loss = 7.9782e-02, PNorm = 83.8109, GNorm = 0.7138, lr_0 = 1.6536e-04
Loss = 8.4770e-02, PNorm = 83.8124, GNorm = 0.6601, lr_0 = 1.6524e-04
Loss = 9.6601e-02, PNorm = 83.8146, GNorm = 0.6556, lr_0 = 1.6513e-04
Loss = 8.7160e-02, PNorm = 83.8171, GNorm = 0.8687, lr_0 = 1.6502e-04
Loss = 1.0291e-01, PNorm = 83.8216, GNorm = 1.2355, lr_0 = 1.6490e-04
Loss = 1.1195e-01, PNorm = 83.8263, GNorm = 1.2182, lr_0 = 1.6479e-04
Loss = 8.8091e-02, PNorm = 83.8289, GNorm = 0.7151, lr_0 = 1.6468e-04
Loss = 7.4589e-02, PNorm = 83.8322, GNorm = 0.6021, lr_0 = 1.6457e-04
Loss = 8.4226e-02, PNorm = 83.8380, GNorm = 0.7821, lr_0 = 1.6445e-04
Loss = 8.5805e-02, PNorm = 83.8413, GNorm = 0.7309, lr_0 = 1.6434e-04
Loss = 9.3761e-02, PNorm = 83.8435, GNorm = 0.5785, lr_0 = 1.6423e-04
Loss = 8.9717e-02, PNorm = 83.8453, GNorm = 0.5771, lr_0 = 1.6412e-04
Loss = 9.3032e-02, PNorm = 83.8471, GNorm = 0.7873, lr_0 = 1.6400e-04
Loss = 8.3154e-02, PNorm = 83.8495, GNorm = 0.4875, lr_0 = 1.6389e-04
Loss = 9.2057e-02, PNorm = 83.8539, GNorm = 0.6806, lr_0 = 1.6378e-04
Validation mae = 0.226900
Epoch 24
Loss = 8.0747e-02, PNorm = 83.8579, GNorm = 0.9255, lr_0 = 1.6367e-04
Loss = 8.9829e-02, PNorm = 83.8608, GNorm = 0.6498, lr_0 = 1.6355e-04
Loss = 8.6562e-02, PNorm = 83.8649, GNorm = 0.6307, lr_0 = 1.6344e-04
Loss = 9.3326e-02, PNorm = 83.8710, GNorm = 0.6286, lr_0 = 1.6333e-04
Loss = 8.3173e-02, PNorm = 83.8762, GNorm = 0.8112, lr_0 = 1.6322e-04
Loss = 8.0608e-02, PNorm = 83.8796, GNorm = 0.6054, lr_0 = 1.6311e-04
Loss = 9.1190e-02, PNorm = 83.8810, GNorm = 0.6727, lr_0 = 1.6299e-04
Loss = 8.8908e-02, PNorm = 83.8837, GNorm = 0.6428, lr_0 = 1.6288e-04
Loss = 7.1086e-02, PNorm = 83.8837, GNorm = 0.6312, lr_0 = 1.6277e-04
Loss = 8.1738e-02, PNorm = 83.8868, GNorm = 0.7750, lr_0 = 1.6266e-04
Loss = 8.1309e-02, PNorm = 83.8904, GNorm = 0.7625, lr_0 = 1.6255e-04
Loss = 7.3101e-02, PNorm = 83.8909, GNorm = 0.5780, lr_0 = 1.6244e-04
Loss = 8.1573e-02, PNorm = 83.8899, GNorm = 0.6814, lr_0 = 1.6233e-04
Loss = 7.8605e-02, PNorm = 83.8941, GNorm = 1.2292, lr_0 = 1.6221e-04
Loss = 7.8856e-02, PNorm = 83.8975, GNorm = 0.6110, lr_0 = 1.6210e-04
Loss = 7.6749e-02, PNorm = 83.9003, GNorm = 0.6849, lr_0 = 1.6199e-04
Loss = 8.7624e-02, PNorm = 83.9026, GNorm = 0.6487, lr_0 = 1.6188e-04
Loss = 8.6715e-02, PNorm = 83.9067, GNorm = 0.6656, lr_0 = 1.6177e-04
Loss = 7.6401e-02, PNorm = 83.9103, GNorm = 0.6418, lr_0 = 1.6166e-04
Loss = 7.2763e-02, PNorm = 83.9122, GNorm = 0.7568, lr_0 = 1.6155e-04
Loss = 8.5564e-02, PNorm = 83.9168, GNorm = 0.6215, lr_0 = 1.6144e-04
Loss = 8.0751e-02, PNorm = 83.9217, GNorm = 0.5194, lr_0 = 1.6133e-04
Loss = 7.5522e-02, PNorm = 83.9253, GNorm = 0.6996, lr_0 = 1.6122e-04
Loss = 8.4516e-02, PNorm = 83.9287, GNorm = 0.6475, lr_0 = 1.6111e-04
Loss = 8.9507e-02, PNorm = 83.9310, GNorm = 0.8535, lr_0 = 1.6100e-04
Loss = 9.0536e-02, PNorm = 83.9345, GNorm = 0.6799, lr_0 = 1.6089e-04
Loss = 8.7865e-02, PNorm = 83.9346, GNorm = 0.5240, lr_0 = 1.6078e-04
Loss = 7.9909e-02, PNorm = 83.9367, GNorm = 0.5360, lr_0 = 1.6067e-04
Loss = 7.7039e-02, PNorm = 83.9392, GNorm = 0.7134, lr_0 = 1.6056e-04
Loss = 8.7735e-02, PNorm = 83.9412, GNorm = 0.8441, lr_0 = 1.6045e-04
Loss = 8.3847e-02, PNorm = 83.9419, GNorm = 0.7372, lr_0 = 1.6034e-04
Loss = 8.3054e-02, PNorm = 83.9445, GNorm = 0.5782, lr_0 = 1.6023e-04
Loss = 7.6794e-02, PNorm = 83.9468, GNorm = 0.8828, lr_0 = 1.6012e-04
Loss = 8.8469e-02, PNorm = 83.9497, GNorm = 0.5879, lr_0 = 1.6001e-04
Loss = 6.9849e-02, PNorm = 83.9535, GNorm = 0.7504, lr_0 = 1.5990e-04
Loss = 8.8423e-02, PNorm = 83.9554, GNorm = 0.5818, lr_0 = 1.5979e-04
Loss = 8.1755e-02, PNorm = 83.9588, GNorm = 0.5756, lr_0 = 1.5968e-04
Loss = 7.5134e-02, PNorm = 83.9615, GNorm = 0.4824, lr_0 = 1.5957e-04
Loss = 9.2835e-02, PNorm = 83.9635, GNorm = 0.8638, lr_0 = 1.5946e-04
Loss = 9.4493e-02, PNorm = 83.9660, GNorm = 0.8478, lr_0 = 1.5935e-04
Loss = 8.0998e-02, PNorm = 83.9666, GNorm = 0.6170, lr_0 = 1.5924e-04
Loss = 8.9991e-02, PNorm = 83.9691, GNorm = 0.6640, lr_0 = 1.5913e-04
Loss = 7.9226e-02, PNorm = 83.9719, GNorm = 0.8910, lr_0 = 1.5902e-04
Loss = 6.6266e-02, PNorm = 83.9722, GNorm = 0.4739, lr_0 = 1.5891e-04
Loss = 8.2561e-02, PNorm = 83.9733, GNorm = 0.8026, lr_0 = 1.5880e-04
Loss = 7.8233e-02, PNorm = 83.9770, GNorm = 0.6924, lr_0 = 1.5870e-04
Loss = 7.6913e-02, PNorm = 83.9818, GNorm = 0.6210, lr_0 = 1.5859e-04
Loss = 8.6008e-02, PNorm = 83.9843, GNorm = 0.5282, lr_0 = 1.5848e-04
Loss = 9.0368e-02, PNorm = 83.9867, GNorm = 0.5210, lr_0 = 1.5837e-04
Loss = 8.1124e-02, PNorm = 83.9897, GNorm = 0.7253, lr_0 = 1.5826e-04
Loss = 6.6553e-02, PNorm = 83.9907, GNorm = 0.4836, lr_0 = 1.5815e-04
Loss = 7.9393e-02, PNorm = 83.9931, GNorm = 0.7176, lr_0 = 1.5804e-04
Loss = 7.6828e-02, PNorm = 83.9961, GNorm = 0.8845, lr_0 = 1.5794e-04
Loss = 9.0639e-02, PNorm = 83.9997, GNorm = 0.5053, lr_0 = 1.5783e-04
Loss = 8.6442e-02, PNorm = 84.0012, GNorm = 0.8679, lr_0 = 1.5772e-04
Loss = 8.3047e-02, PNorm = 84.0045, GNorm = 0.6991, lr_0 = 1.5761e-04
Loss = 8.0395e-02, PNorm = 84.0076, GNorm = 0.6466, lr_0 = 1.5750e-04
Loss = 8.6469e-02, PNorm = 84.0106, GNorm = 0.8395, lr_0 = 1.5740e-04
Loss = 8.3312e-02, PNorm = 84.0113, GNorm = 0.6694, lr_0 = 1.5729e-04
Loss = 7.4121e-02, PNorm = 84.0119, GNorm = 0.5344, lr_0 = 1.5718e-04
Loss = 9.7410e-02, PNorm = 84.0143, GNorm = 0.7814, lr_0 = 1.5707e-04
Loss = 8.6912e-02, PNorm = 84.0160, GNorm = 0.8880, lr_0 = 1.5697e-04
Loss = 8.2313e-02, PNorm = 84.0179, GNorm = 0.8519, lr_0 = 1.5686e-04
Loss = 7.9108e-02, PNorm = 84.0197, GNorm = 0.5193, lr_0 = 1.5675e-04
Loss = 7.8215e-02, PNorm = 84.0223, GNorm = 0.6217, lr_0 = 1.5664e-04
Loss = 8.6186e-02, PNorm = 84.0236, GNorm = 0.9147, lr_0 = 1.5654e-04
Loss = 8.8193e-02, PNorm = 84.0268, GNorm = 0.8234, lr_0 = 1.5643e-04
Loss = 8.2698e-02, PNorm = 84.0295, GNorm = 0.6269, lr_0 = 1.5632e-04
Loss = 7.7563e-02, PNorm = 84.0320, GNorm = 0.5615, lr_0 = 1.5621e-04
Loss = 8.8388e-02, PNorm = 84.0342, GNorm = 0.6535, lr_0 = 1.5611e-04
Loss = 7.2673e-02, PNorm = 84.0362, GNorm = 0.5407, lr_0 = 1.5600e-04
Loss = 8.7036e-02, PNorm = 84.0384, GNorm = 0.5896, lr_0 = 1.5589e-04
Loss = 8.3543e-02, PNorm = 84.0411, GNorm = 0.6387, lr_0 = 1.5579e-04
Loss = 8.3628e-02, PNorm = 84.0447, GNorm = 0.5022, lr_0 = 1.5568e-04
Loss = 7.7892e-02, PNorm = 84.0475, GNorm = 0.6931, lr_0 = 1.5557e-04
Loss = 8.3891e-02, PNorm = 84.0483, GNorm = 0.9874, lr_0 = 1.5547e-04
Loss = 7.8585e-02, PNorm = 84.0515, GNorm = 0.7577, lr_0 = 1.5536e-04
Loss = 8.6424e-02, PNorm = 84.0563, GNorm = 0.6375, lr_0 = 1.5525e-04
Loss = 7.9201e-02, PNorm = 84.0596, GNorm = 0.5312, lr_0 = 1.5515e-04
Loss = 8.9425e-02, PNorm = 84.0606, GNorm = 0.7045, lr_0 = 1.5504e-04
Loss = 7.8312e-02, PNorm = 84.0629, GNorm = 0.5006, lr_0 = 1.5493e-04
Loss = 8.1999e-02, PNorm = 84.0652, GNorm = 0.7494, lr_0 = 1.5483e-04
Loss = 8.5873e-02, PNorm = 84.0682, GNorm = 0.7430, lr_0 = 1.5472e-04
Loss = 8.3499e-02, PNorm = 84.0692, GNorm = 0.5594, lr_0 = 1.5462e-04
Loss = 6.5604e-02, PNorm = 84.0727, GNorm = 0.7527, lr_0 = 1.5451e-04
Loss = 9.5586e-02, PNorm = 84.0761, GNorm = 1.0987, lr_0 = 1.5440e-04
Loss = 9.1288e-02, PNorm = 84.0804, GNorm = 0.7786, lr_0 = 1.5430e-04
Loss = 8.0786e-02, PNorm = 84.0829, GNorm = 0.8407, lr_0 = 1.5419e-04
Loss = 7.7216e-02, PNorm = 84.0828, GNorm = 0.7250, lr_0 = 1.5409e-04
Loss = 8.1811e-02, PNorm = 84.0843, GNorm = 0.7018, lr_0 = 1.5398e-04
Loss = 9.1644e-02, PNorm = 84.0871, GNorm = 0.5668, lr_0 = 1.5388e-04
Loss = 7.4816e-02, PNorm = 84.0904, GNorm = 0.5842, lr_0 = 1.5377e-04
Loss = 8.0313e-02, PNorm = 84.0914, GNorm = 0.5195, lr_0 = 1.5367e-04
Loss = 8.2987e-02, PNorm = 84.0939, GNorm = 0.8045, lr_0 = 1.5356e-04
Loss = 8.0330e-02, PNorm = 84.0965, GNorm = 0.6636, lr_0 = 1.5346e-04
Loss = 8.4657e-02, PNorm = 84.0995, GNorm = 0.8060, lr_0 = 1.5335e-04
Loss = 8.0877e-02, PNorm = 84.1025, GNorm = 0.6263, lr_0 = 1.5325e-04
Loss = 9.1896e-02, PNorm = 84.1063, GNorm = 0.7078, lr_0 = 1.5314e-04
Loss = 8.1885e-02, PNorm = 84.1101, GNorm = 0.5994, lr_0 = 1.5304e-04
Loss = 7.1344e-02, PNorm = 84.1116, GNorm = 0.6070, lr_0 = 1.5293e-04
Loss = 8.1056e-02, PNorm = 84.1129, GNorm = 0.5677, lr_0 = 1.5283e-04
Loss = 7.5600e-02, PNorm = 84.1121, GNorm = 0.6454, lr_0 = 1.5272e-04
Loss = 8.3240e-02, PNorm = 84.1133, GNorm = 0.8438, lr_0 = 1.5262e-04
Loss = 1.0517e-01, PNorm = 84.1155, GNorm = 0.6032, lr_0 = 1.5251e-04
Loss = 9.7961e-02, PNorm = 84.1175, GNorm = 0.8404, lr_0 = 1.5241e-04
Loss = 8.4042e-02, PNorm = 84.1199, GNorm = 0.7550, lr_0 = 1.5230e-04
Loss = 7.6326e-02, PNorm = 84.1250, GNorm = 0.6472, lr_0 = 1.5220e-04
Loss = 8.0210e-02, PNorm = 84.1289, GNorm = 0.5195, lr_0 = 1.5209e-04
Loss = 7.4836e-02, PNorm = 84.1306, GNorm = 0.5087, lr_0 = 1.5199e-04
Loss = 8.2147e-02, PNorm = 84.1320, GNorm = 0.8766, lr_0 = 1.5189e-04
Loss = 9.2331e-02, PNorm = 84.1348, GNorm = 0.9117, lr_0 = 1.5178e-04
Loss = 7.6038e-02, PNorm = 84.1365, GNorm = 0.7483, lr_0 = 1.5168e-04
Loss = 7.7021e-02, PNorm = 84.1382, GNorm = 0.6186, lr_0 = 1.5157e-04
Loss = 9.1429e-02, PNorm = 84.1419, GNorm = 0.6548, lr_0 = 1.5147e-04
Loss = 7.3129e-02, PNorm = 84.1451, GNorm = 0.5733, lr_0 = 1.5137e-04
Loss = 7.8263e-02, PNorm = 84.1477, GNorm = 0.6835, lr_0 = 1.5126e-04
Loss = 7.6439e-02, PNorm = 84.1487, GNorm = 0.6463, lr_0 = 1.5116e-04
Loss = 8.3494e-02, PNorm = 84.1484, GNorm = 0.7513, lr_0 = 1.5106e-04
Loss = 8.8286e-02, PNorm = 84.1505, GNorm = 0.6708, lr_0 = 1.5095e-04
Loss = 8.4603e-02, PNorm = 84.1522, GNorm = 0.6214, lr_0 = 1.5085e-04
Validation mae = 0.228226
Epoch 25
Loss = 8.2706e-02, PNorm = 84.1539, GNorm = 0.8539, lr_0 = 1.5075e-04
Loss = 7.8600e-02, PNorm = 84.1553, GNorm = 0.4816, lr_0 = 1.5064e-04
Loss = 7.9175e-02, PNorm = 84.1593, GNorm = 0.7980, lr_0 = 1.5054e-04
Loss = 7.4940e-02, PNorm = 84.1614, GNorm = 0.4308, lr_0 = 1.5044e-04
Loss = 7.8228e-02, PNorm = 84.1622, GNorm = 0.5725, lr_0 = 1.5033e-04
Loss = 7.2895e-02, PNorm = 84.1651, GNorm = 0.6025, lr_0 = 1.5023e-04
Loss = 7.7772e-02, PNorm = 84.1693, GNorm = 0.6668, lr_0 = 1.5013e-04
Loss = 7.7578e-02, PNorm = 84.1728, GNorm = 0.5518, lr_0 = 1.5002e-04
Loss = 7.4763e-02, PNorm = 84.1745, GNorm = 0.9862, lr_0 = 1.4992e-04
Loss = 7.9141e-02, PNorm = 84.1774, GNorm = 0.5113, lr_0 = 1.4982e-04
Loss = 8.2833e-02, PNorm = 84.1806, GNorm = 0.7367, lr_0 = 1.4972e-04
Loss = 8.1236e-02, PNorm = 84.1824, GNorm = 0.6271, lr_0 = 1.4961e-04
Loss = 7.4447e-02, PNorm = 84.1831, GNorm = 0.6806, lr_0 = 1.4951e-04
Loss = 7.2122e-02, PNorm = 84.1855, GNorm = 0.5878, lr_0 = 1.4941e-04
Loss = 8.0957e-02, PNorm = 84.1880, GNorm = 0.6394, lr_0 = 1.4931e-04
Loss = 7.1366e-02, PNorm = 84.1920, GNorm = 0.6913, lr_0 = 1.4920e-04
Loss = 8.6782e-02, PNorm = 84.1947, GNorm = 0.5737, lr_0 = 1.4910e-04
Loss = 8.6521e-02, PNorm = 84.1964, GNorm = 0.8568, lr_0 = 1.4900e-04
Loss = 7.8443e-02, PNorm = 84.2003, GNorm = 0.7024, lr_0 = 1.4890e-04
Loss = 7.8821e-02, PNorm = 84.2042, GNorm = 0.6765, lr_0 = 1.4880e-04
Loss = 8.7565e-02, PNorm = 84.2062, GNorm = 0.5680, lr_0 = 1.4869e-04
Loss = 7.7134e-02, PNorm = 84.2090, GNorm = 0.5582, lr_0 = 1.4859e-04
Loss = 8.0749e-02, PNorm = 84.2099, GNorm = 0.7147, lr_0 = 1.4849e-04
Loss = 7.5074e-02, PNorm = 84.2113, GNorm = 0.6935, lr_0 = 1.4839e-04
Loss = 7.9964e-02, PNorm = 84.2130, GNorm = 0.6030, lr_0 = 1.4829e-04
Loss = 7.7713e-02, PNorm = 84.2153, GNorm = 0.6149, lr_0 = 1.4818e-04
Loss = 8.1713e-02, PNorm = 84.2153, GNorm = 0.5883, lr_0 = 1.4808e-04
Loss = 7.6877e-02, PNorm = 84.2154, GNorm = 0.4272, lr_0 = 1.4798e-04
Loss = 7.8736e-02, PNorm = 84.2161, GNorm = 0.5819, lr_0 = 1.4788e-04
Loss = 8.2095e-02, PNorm = 84.2195, GNorm = 0.6514, lr_0 = 1.4778e-04
Loss = 6.7465e-02, PNorm = 84.2219, GNorm = 0.6337, lr_0 = 1.4768e-04
Loss = 7.2611e-02, PNorm = 84.2222, GNorm = 0.5614, lr_0 = 1.4758e-04
Loss = 8.6927e-02, PNorm = 84.2230, GNorm = 0.9077, lr_0 = 1.4748e-04
Loss = 6.6489e-02, PNorm = 84.2250, GNorm = 0.5718, lr_0 = 1.4737e-04
Loss = 8.4742e-02, PNorm = 84.2278, GNorm = 0.6009, lr_0 = 1.4727e-04
Loss = 7.8978e-02, PNorm = 84.2281, GNorm = 1.0176, lr_0 = 1.4717e-04
Loss = 7.7183e-02, PNorm = 84.2304, GNorm = 0.8042, lr_0 = 1.4707e-04
Loss = 8.5161e-02, PNorm = 84.2343, GNorm = 0.6361, lr_0 = 1.4697e-04
Loss = 8.0133e-02, PNorm = 84.2378, GNorm = 0.5929, lr_0 = 1.4687e-04
Loss = 8.5221e-02, PNorm = 84.2403, GNorm = 0.5367, lr_0 = 1.4677e-04
Loss = 7.8261e-02, PNorm = 84.2437, GNorm = 1.0450, lr_0 = 1.4667e-04
Loss = 7.6929e-02, PNorm = 84.2448, GNorm = 0.6059, lr_0 = 1.4657e-04
Loss = 8.2156e-02, PNorm = 84.2472, GNorm = 0.5284, lr_0 = 1.4647e-04
Loss = 8.7593e-02, PNorm = 84.2520, GNorm = 0.8345, lr_0 = 1.4637e-04
Loss = 8.2155e-02, PNorm = 84.2543, GNorm = 0.6458, lr_0 = 1.4627e-04
Loss = 7.4870e-02, PNorm = 84.2573, GNorm = 0.8176, lr_0 = 1.4617e-04
Loss = 8.6709e-02, PNorm = 84.2590, GNorm = 0.7684, lr_0 = 1.4607e-04
Loss = 8.7505e-02, PNorm = 84.2589, GNorm = 0.6616, lr_0 = 1.4597e-04
Loss = 8.1616e-02, PNorm = 84.2588, GNorm = 0.7247, lr_0 = 1.4587e-04
Loss = 8.1335e-02, PNorm = 84.2578, GNorm = 0.6456, lr_0 = 1.4577e-04
Loss = 6.9557e-02, PNorm = 84.2593, GNorm = 0.5273, lr_0 = 1.4567e-04
Loss = 7.6306e-02, PNorm = 84.2612, GNorm = 0.6088, lr_0 = 1.4557e-04
Loss = 8.6094e-02, PNorm = 84.2624, GNorm = 0.5266, lr_0 = 1.4547e-04
Loss = 8.4992e-02, PNorm = 84.2645, GNorm = 0.5414, lr_0 = 1.4537e-04
Loss = 8.1116e-02, PNorm = 84.2679, GNorm = 0.5000, lr_0 = 1.4527e-04
Loss = 7.6036e-02, PNorm = 84.2695, GNorm = 0.4366, lr_0 = 1.4517e-04
Loss = 8.8439e-02, PNorm = 84.2737, GNorm = 0.4660, lr_0 = 1.4507e-04
Loss = 7.8448e-02, PNorm = 84.2766, GNorm = 0.5331, lr_0 = 1.4497e-04
Loss = 8.2058e-02, PNorm = 84.2796, GNorm = 0.5210, lr_0 = 1.4487e-04
Loss = 8.0077e-02, PNorm = 84.2813, GNorm = 0.5435, lr_0 = 1.4477e-04
Loss = 7.7120e-02, PNorm = 84.2807, GNorm = 0.5671, lr_0 = 1.4467e-04
Loss = 7.6566e-02, PNorm = 84.2811, GNorm = 0.5710, lr_0 = 1.4457e-04
Loss = 9.1935e-02, PNorm = 84.2831, GNorm = 0.5484, lr_0 = 1.4447e-04
Loss = 7.6660e-02, PNorm = 84.2871, GNorm = 0.5945, lr_0 = 1.4438e-04
Loss = 8.0769e-02, PNorm = 84.2884, GNorm = 0.7228, lr_0 = 1.4428e-04
Loss = 8.1646e-02, PNorm = 84.2913, GNorm = 0.8031, lr_0 = 1.4418e-04
Loss = 6.7433e-02, PNorm = 84.2951, GNorm = 0.6160, lr_0 = 1.4408e-04
Loss = 8.4118e-02, PNorm = 84.2966, GNorm = 0.5867, lr_0 = 1.4398e-04
Loss = 7.7583e-02, PNorm = 84.2980, GNorm = 0.6499, lr_0 = 1.4388e-04
Loss = 7.7328e-02, PNorm = 84.2993, GNorm = 0.6886, lr_0 = 1.4378e-04
Loss = 8.5165e-02, PNorm = 84.3008, GNorm = 0.9671, lr_0 = 1.4368e-04
Loss = 8.0181e-02, PNorm = 84.3040, GNorm = 0.6373, lr_0 = 1.4359e-04
Loss = 8.9380e-02, PNorm = 84.3077, GNorm = 0.9027, lr_0 = 1.4349e-04
Loss = 7.1036e-02, PNorm = 84.3090, GNorm = 0.6827, lr_0 = 1.4339e-04
Loss = 8.3801e-02, PNorm = 84.3108, GNorm = 0.6875, lr_0 = 1.4329e-04
Loss = 8.3176e-02, PNorm = 84.3120, GNorm = 0.5736, lr_0 = 1.4319e-04
Loss = 9.2323e-02, PNorm = 84.3157, GNorm = 0.5338, lr_0 = 1.4310e-04
Loss = 8.4727e-02, PNorm = 84.3186, GNorm = 0.6632, lr_0 = 1.4300e-04
Loss = 8.8712e-02, PNorm = 84.3195, GNorm = 0.4997, lr_0 = 1.4290e-04
Loss = 8.6297e-02, PNorm = 84.3214, GNorm = 0.8998, lr_0 = 1.4280e-04
Loss = 7.6746e-02, PNorm = 84.3246, GNorm = 0.6325, lr_0 = 1.4270e-04
Loss = 8.1175e-02, PNorm = 84.3255, GNorm = 0.5985, lr_0 = 1.4261e-04
Loss = 7.9078e-02, PNorm = 84.3269, GNorm = 0.6208, lr_0 = 1.4251e-04
Loss = 7.9028e-02, PNorm = 84.3279, GNorm = 0.5480, lr_0 = 1.4241e-04
Loss = 9.6671e-02, PNorm = 84.3300, GNorm = 0.6487, lr_0 = 1.4231e-04
Loss = 8.5652e-02, PNorm = 84.3333, GNorm = 0.5498, lr_0 = 1.4222e-04
Loss = 8.0386e-02, PNorm = 84.3329, GNorm = 0.5640, lr_0 = 1.4212e-04
Loss = 8.0367e-02, PNorm = 84.3348, GNorm = 0.6969, lr_0 = 1.4202e-04
Loss = 8.4079e-02, PNorm = 84.3369, GNorm = 0.6304, lr_0 = 1.4192e-04
Loss = 8.9550e-02, PNorm = 84.3396, GNorm = 0.6657, lr_0 = 1.4183e-04
Loss = 8.0005e-02, PNorm = 84.3431, GNorm = 0.7401, lr_0 = 1.4173e-04
Loss = 9.6270e-02, PNorm = 84.3453, GNorm = 0.6132, lr_0 = 1.4163e-04
Loss = 7.4668e-02, PNorm = 84.3481, GNorm = 0.6950, lr_0 = 1.4153e-04
Loss = 8.3420e-02, PNorm = 84.3518, GNorm = 0.7862, lr_0 = 1.4144e-04
Loss = 8.5791e-02, PNorm = 84.3520, GNorm = 0.7043, lr_0 = 1.4134e-04
Loss = 7.6140e-02, PNorm = 84.3529, GNorm = 0.6685, lr_0 = 1.4124e-04
Loss = 7.6739e-02, PNorm = 84.3565, GNorm = 0.7242, lr_0 = 1.4115e-04
Loss = 8.5547e-02, PNorm = 84.3603, GNorm = 0.6576, lr_0 = 1.4105e-04
Loss = 7.7304e-02, PNorm = 84.3630, GNorm = 0.6014, lr_0 = 1.4095e-04
Loss = 8.0603e-02, PNorm = 84.3640, GNorm = 0.5498, lr_0 = 1.4086e-04
Loss = 8.7901e-02, PNorm = 84.3672, GNorm = 0.7027, lr_0 = 1.4076e-04
Loss = 8.0121e-02, PNorm = 84.3692, GNorm = 0.6357, lr_0 = 1.4066e-04
Loss = 8.5378e-02, PNorm = 84.3709, GNorm = 0.7471, lr_0 = 1.4057e-04
Loss = 8.9539e-02, PNorm = 84.3737, GNorm = 0.6399, lr_0 = 1.4047e-04
Loss = 9.6051e-02, PNorm = 84.3767, GNorm = 1.1701, lr_0 = 1.4038e-04
Loss = 9.3109e-02, PNorm = 84.3790, GNorm = 0.6685, lr_0 = 1.4028e-04
Loss = 9.1031e-02, PNorm = 84.3811, GNorm = 0.6744, lr_0 = 1.4018e-04
Loss = 9.2559e-02, PNorm = 84.3835, GNorm = 0.7167, lr_0 = 1.4009e-04
Loss = 6.9977e-02, PNorm = 84.3856, GNorm = 0.5081, lr_0 = 1.3999e-04
Loss = 8.7099e-02, PNorm = 84.3867, GNorm = 0.5927, lr_0 = 1.3990e-04
Loss = 9.5009e-02, PNorm = 84.3891, GNorm = 0.6448, lr_0 = 1.3980e-04
Loss = 7.7416e-02, PNorm = 84.3922, GNorm = 0.5734, lr_0 = 1.3970e-04
Loss = 8.5618e-02, PNorm = 84.3929, GNorm = 0.4838, lr_0 = 1.3961e-04
Loss = 7.9786e-02, PNorm = 84.3935, GNorm = 0.7577, lr_0 = 1.3951e-04
Loss = 8.0494e-02, PNorm = 84.3954, GNorm = 0.5108, lr_0 = 1.3942e-04
Loss = 8.3037e-02, PNorm = 84.3967, GNorm = 0.7506, lr_0 = 1.3932e-04
Loss = 8.1698e-02, PNorm = 84.4009, GNorm = 0.6893, lr_0 = 1.3923e-04
Loss = 8.7779e-02, PNorm = 84.4045, GNorm = 0.6374, lr_0 = 1.3913e-04
Loss = 7.6567e-02, PNorm = 84.4065, GNorm = 0.6531, lr_0 = 1.3904e-04
Loss = 8.1313e-02, PNorm = 84.4081, GNorm = 0.5331, lr_0 = 1.3894e-04
Validation mae = 0.227033
Epoch 26
Loss = 7.3162e-02, PNorm = 84.4108, GNorm = 0.5899, lr_0 = 1.3884e-04
Loss = 7.4541e-02, PNorm = 84.4111, GNorm = 0.6683, lr_0 = 1.3875e-04
Loss = 7.4586e-02, PNorm = 84.4112, GNorm = 0.6481, lr_0 = 1.3865e-04
Loss = 7.7944e-02, PNorm = 84.4132, GNorm = 0.8504, lr_0 = 1.3856e-04
Loss = 8.1505e-02, PNorm = 84.4167, GNorm = 0.6337, lr_0 = 1.3846e-04
Loss = 7.3753e-02, PNorm = 84.4178, GNorm = 0.5488, lr_0 = 1.3837e-04
Loss = 7.6659e-02, PNorm = 84.4190, GNorm = 0.6307, lr_0 = 1.3828e-04
Loss = 7.8943e-02, PNorm = 84.4206, GNorm = 0.5505, lr_0 = 1.3818e-04
Loss = 7.3359e-02, PNorm = 84.4218, GNorm = 0.6024, lr_0 = 1.3809e-04
Loss = 6.5672e-02, PNorm = 84.4248, GNorm = 0.6122, lr_0 = 1.3799e-04
Loss = 8.8787e-02, PNorm = 84.4280, GNorm = 0.7181, lr_0 = 1.3790e-04
Loss = 6.8861e-02, PNorm = 84.4324, GNorm = 0.5378, lr_0 = 1.3780e-04
Loss = 7.2022e-02, PNorm = 84.4339, GNorm = 0.4733, lr_0 = 1.3771e-04
Loss = 8.5564e-02, PNorm = 84.4342, GNorm = 0.6974, lr_0 = 1.3761e-04
Loss = 7.7529e-02, PNorm = 84.4378, GNorm = 0.8817, lr_0 = 1.3752e-04
Loss = 7.9748e-02, PNorm = 84.4405, GNorm = 0.5309, lr_0 = 1.3742e-04
Loss = 6.1005e-02, PNorm = 84.4429, GNorm = 0.5972, lr_0 = 1.3733e-04
Loss = 7.5200e-02, PNorm = 84.4440, GNorm = 0.6862, lr_0 = 1.3724e-04
Loss = 8.4308e-02, PNorm = 84.4453, GNorm = 0.5596, lr_0 = 1.3714e-04
Loss = 8.1874e-02, PNorm = 84.4491, GNorm = 0.7481, lr_0 = 1.3705e-04
Loss = 8.3650e-02, PNorm = 84.4522, GNorm = 0.5616, lr_0 = 1.3695e-04
Loss = 8.8139e-02, PNorm = 84.4549, GNorm = 0.7195, lr_0 = 1.3686e-04
Loss = 9.2266e-02, PNorm = 84.4564, GNorm = 0.7564, lr_0 = 1.3677e-04
Loss = 7.9301e-02, PNorm = 84.4580, GNorm = 0.6957, lr_0 = 1.3667e-04
Loss = 7.8053e-02, PNorm = 84.4585, GNorm = 0.6559, lr_0 = 1.3658e-04
Loss = 7.8389e-02, PNorm = 84.4609, GNorm = 0.6386, lr_0 = 1.3649e-04
Loss = 7.5195e-02, PNorm = 84.4654, GNorm = 0.8490, lr_0 = 1.3639e-04
Loss = 7.5858e-02, PNorm = 84.4682, GNorm = 0.6546, lr_0 = 1.3630e-04
Loss = 8.5497e-02, PNorm = 84.4703, GNorm = 0.8382, lr_0 = 1.3621e-04
Loss = 7.4102e-02, PNorm = 84.4717, GNorm = 0.7009, lr_0 = 1.3611e-04
Loss = 7.7619e-02, PNorm = 84.4712, GNorm = 0.6520, lr_0 = 1.3602e-04
Loss = 8.9543e-02, PNorm = 84.4733, GNorm = 0.5389, lr_0 = 1.3593e-04
Loss = 8.2454e-02, PNorm = 84.4766, GNorm = 0.6119, lr_0 = 1.3583e-04
Loss = 7.0969e-02, PNorm = 84.4786, GNorm = 0.5468, lr_0 = 1.3574e-04
Loss = 7.6741e-02, PNorm = 84.4802, GNorm = 0.6476, lr_0 = 1.3565e-04
Loss = 8.9636e-02, PNorm = 84.4808, GNorm = 0.6695, lr_0 = 1.3555e-04
Loss = 8.1930e-02, PNorm = 84.4824, GNorm = 0.5411, lr_0 = 1.3546e-04
Loss = 9.0389e-02, PNorm = 84.4847, GNorm = 0.8668, lr_0 = 1.3537e-04
Loss = 7.8377e-02, PNorm = 84.4877, GNorm = 0.7755, lr_0 = 1.3528e-04
Loss = 8.5622e-02, PNorm = 84.4894, GNorm = 0.5864, lr_0 = 1.3518e-04
Loss = 8.0326e-02, PNorm = 84.4893, GNorm = 0.9159, lr_0 = 1.3509e-04
Loss = 8.3626e-02, PNorm = 84.4906, GNorm = 1.1270, lr_0 = 1.3500e-04
Loss = 7.3511e-02, PNorm = 84.4936, GNorm = 0.5753, lr_0 = 1.3491e-04
Loss = 7.4010e-02, PNorm = 84.4966, GNorm = 0.6275, lr_0 = 1.3481e-04
Loss = 7.4203e-02, PNorm = 84.4990, GNorm = 0.8387, lr_0 = 1.3472e-04
Loss = 8.0727e-02, PNorm = 84.4998, GNorm = 0.7188, lr_0 = 1.3463e-04
Loss = 7.8375e-02, PNorm = 84.5015, GNorm = 0.7561, lr_0 = 1.3454e-04
Loss = 7.8076e-02, PNorm = 84.5041, GNorm = 0.5466, lr_0 = 1.3444e-04
Loss = 7.7208e-02, PNorm = 84.5045, GNorm = 0.5985, lr_0 = 1.3435e-04
Loss = 7.6467e-02, PNorm = 84.5070, GNorm = 0.6773, lr_0 = 1.3426e-04
Loss = 8.2385e-02, PNorm = 84.5081, GNorm = 0.5203, lr_0 = 1.3417e-04
Loss = 9.4070e-02, PNorm = 84.5075, GNorm = 0.6354, lr_0 = 1.3408e-04
Loss = 7.4888e-02, PNorm = 84.5091, GNorm = 0.5107, lr_0 = 1.3398e-04
Loss = 7.8861e-02, PNorm = 84.5105, GNorm = 0.7460, lr_0 = 1.3389e-04
Loss = 8.1727e-02, PNorm = 84.5119, GNorm = 0.8184, lr_0 = 1.3380e-04
Loss = 6.8806e-02, PNorm = 84.5129, GNorm = 0.7870, lr_0 = 1.3371e-04
Loss = 7.4024e-02, PNorm = 84.5142, GNorm = 0.6741, lr_0 = 1.3362e-04
Loss = 7.4011e-02, PNorm = 84.5163, GNorm = 0.4564, lr_0 = 1.3353e-04
Loss = 7.4817e-02, PNorm = 84.5188, GNorm = 0.6295, lr_0 = 1.3343e-04
Loss = 7.9446e-02, PNorm = 84.5222, GNorm = 0.7142, lr_0 = 1.3334e-04
Loss = 8.1616e-02, PNorm = 84.5241, GNorm = 0.5906, lr_0 = 1.3325e-04
Loss = 7.5698e-02, PNorm = 84.5268, GNorm = 0.5258, lr_0 = 1.3316e-04
Loss = 7.9211e-02, PNorm = 84.5275, GNorm = 0.6128, lr_0 = 1.3307e-04
Loss = 8.2720e-02, PNorm = 84.5299, GNorm = 0.6185, lr_0 = 1.3298e-04
Loss = 7.9214e-02, PNorm = 84.5305, GNorm = 0.7671, lr_0 = 1.3289e-04
Loss = 7.9334e-02, PNorm = 84.5318, GNorm = 0.7676, lr_0 = 1.3280e-04
Loss = 7.8174e-02, PNorm = 84.5335, GNorm = 0.9802, lr_0 = 1.3270e-04
Loss = 7.8358e-02, PNorm = 84.5355, GNorm = 0.5370, lr_0 = 1.3261e-04
Loss = 7.6906e-02, PNorm = 84.5371, GNorm = 0.5181, lr_0 = 1.3252e-04
Loss = 8.3585e-02, PNorm = 84.5396, GNorm = 0.7026, lr_0 = 1.3243e-04
Loss = 7.2325e-02, PNorm = 84.5423, GNorm = 0.6160, lr_0 = 1.3234e-04
Loss = 7.7392e-02, PNorm = 84.5456, GNorm = 0.8385, lr_0 = 1.3225e-04
Loss = 8.3971e-02, PNorm = 84.5471, GNorm = 0.5836, lr_0 = 1.3216e-04
Loss = 7.9423e-02, PNorm = 84.5489, GNorm = 0.4688, lr_0 = 1.3207e-04
Loss = 8.7533e-02, PNorm = 84.5520, GNorm = 0.9006, lr_0 = 1.3198e-04
Loss = 8.5788e-02, PNorm = 84.5525, GNorm = 0.7283, lr_0 = 1.3189e-04
Loss = 8.0062e-02, PNorm = 84.5564, GNorm = 0.6896, lr_0 = 1.3180e-04
Loss = 8.5755e-02, PNorm = 84.5595, GNorm = 0.6311, lr_0 = 1.3171e-04
Loss = 8.3917e-02, PNorm = 84.5621, GNorm = 0.7873, lr_0 = 1.3162e-04
Loss = 7.4777e-02, PNorm = 84.5642, GNorm = 0.7718, lr_0 = 1.3153e-04
Loss = 7.7099e-02, PNorm = 84.5672, GNorm = 0.6308, lr_0 = 1.3144e-04
Loss = 9.3823e-02, PNorm = 84.5687, GNorm = 0.6623, lr_0 = 1.3135e-04
Loss = 8.3262e-02, PNorm = 84.5718, GNorm = 0.5924, lr_0 = 1.3126e-04
Loss = 7.1253e-02, PNorm = 84.5760, GNorm = 0.5194, lr_0 = 1.3117e-04
Loss = 7.7701e-02, PNorm = 84.5775, GNorm = 0.5698, lr_0 = 1.3108e-04
Loss = 8.3961e-02, PNorm = 84.5791, GNorm = 0.6491, lr_0 = 1.3099e-04
Loss = 9.0006e-02, PNorm = 84.5804, GNorm = 0.7014, lr_0 = 1.3090e-04
Loss = 8.3137e-02, PNorm = 84.5807, GNorm = 0.5636, lr_0 = 1.3081e-04
Loss = 7.8814e-02, PNorm = 84.5810, GNorm = 0.7692, lr_0 = 1.3072e-04
Loss = 7.3764e-02, PNorm = 84.5819, GNorm = 0.5736, lr_0 = 1.3063e-04
Loss = 7.7540e-02, PNorm = 84.5830, GNorm = 0.6074, lr_0 = 1.3054e-04
Loss = 8.2325e-02, PNorm = 84.5851, GNorm = 0.6233, lr_0 = 1.3045e-04
Loss = 8.0396e-02, PNorm = 84.5875, GNorm = 0.6380, lr_0 = 1.3036e-04
Loss = 7.6432e-02, PNorm = 84.5900, GNorm = 0.4900, lr_0 = 1.3027e-04
Loss = 9.2131e-02, PNorm = 84.5923, GNorm = 0.6316, lr_0 = 1.3018e-04
Loss = 7.0639e-02, PNorm = 84.5938, GNorm = 0.5540, lr_0 = 1.3009e-04
Loss = 8.2709e-02, PNorm = 84.5957, GNorm = 0.7126, lr_0 = 1.3000e-04
Loss = 7.8543e-02, PNorm = 84.5984, GNorm = 0.6333, lr_0 = 1.2992e-04
Loss = 8.0568e-02, PNorm = 84.6007, GNorm = 0.6209, lr_0 = 1.2983e-04
Loss = 7.5571e-02, PNorm = 84.6032, GNorm = 0.6605, lr_0 = 1.2974e-04
Loss = 9.1274e-02, PNorm = 84.6051, GNorm = 0.4728, lr_0 = 1.2965e-04
Loss = 7.6364e-02, PNorm = 84.6058, GNorm = 0.6768, lr_0 = 1.2956e-04
Loss = 9.0462e-02, PNorm = 84.6061, GNorm = 0.6098, lr_0 = 1.2947e-04
Loss = 7.8750e-02, PNorm = 84.6078, GNorm = 0.5914, lr_0 = 1.2938e-04
Loss = 7.8806e-02, PNorm = 84.6113, GNorm = 0.6787, lr_0 = 1.2929e-04
Loss = 9.4228e-02, PNorm = 84.6138, GNorm = 0.8541, lr_0 = 1.2921e-04
Loss = 6.8240e-02, PNorm = 84.6157, GNorm = 0.7828, lr_0 = 1.2912e-04
Loss = 7.3930e-02, PNorm = 84.6179, GNorm = 0.7817, lr_0 = 1.2903e-04
Loss = 8.1759e-02, PNorm = 84.6197, GNorm = 0.8912, lr_0 = 1.2894e-04
Loss = 8.9379e-02, PNorm = 84.6212, GNorm = 0.7436, lr_0 = 1.2885e-04
Loss = 8.6805e-02, PNorm = 84.6221, GNorm = 0.6621, lr_0 = 1.2876e-04
Loss = 8.6072e-02, PNorm = 84.6258, GNorm = 0.8873, lr_0 = 1.2867e-04
Loss = 7.2831e-02, PNorm = 84.6262, GNorm = 0.8836, lr_0 = 1.2859e-04
Loss = 9.5963e-02, PNorm = 84.6242, GNorm = 0.6771, lr_0 = 1.2850e-04
Loss = 9.3801e-02, PNorm = 84.6263, GNorm = 0.6167, lr_0 = 1.2841e-04
Loss = 8.2297e-02, PNorm = 84.6285, GNorm = 0.7919, lr_0 = 1.2832e-04
Loss = 7.2209e-02, PNorm = 84.6303, GNorm = 0.7204, lr_0 = 1.2823e-04
Loss = 8.1311e-02, PNorm = 84.6317, GNorm = 0.5323, lr_0 = 1.2815e-04
Loss = 8.6327e-02, PNorm = 84.6328, GNorm = 0.6124, lr_0 = 1.2806e-04
Loss = 9.2185e-02, PNorm = 84.6351, GNorm = 0.4919, lr_0 = 1.2797e-04
Validation mae = 0.226182
Epoch 27
Loss = 8.0434e-02, PNorm = 84.6378, GNorm = 0.9030, lr_0 = 1.2788e-04
Loss = 7.4039e-02, PNorm = 84.6378, GNorm = 0.6025, lr_0 = 1.2780e-04
Loss = 8.1165e-02, PNorm = 84.6395, GNorm = 0.4907, lr_0 = 1.2771e-04
Loss = 7.4557e-02, PNorm = 84.6423, GNorm = 0.6461, lr_0 = 1.2762e-04
Loss = 7.5895e-02, PNorm = 84.6453, GNorm = 0.7323, lr_0 = 1.2753e-04
Loss = 7.9511e-02, PNorm = 84.6475, GNorm = 0.7351, lr_0 = 1.2745e-04
Loss = 7.5129e-02, PNorm = 84.6494, GNorm = 0.4758, lr_0 = 1.2736e-04
Loss = 9.0502e-02, PNorm = 84.6510, GNorm = 0.7411, lr_0 = 1.2727e-04
Loss = 7.3678e-02, PNorm = 84.6530, GNorm = 0.6605, lr_0 = 1.2718e-04
Loss = 7.9021e-02, PNorm = 84.6551, GNorm = 0.7302, lr_0 = 1.2710e-04
Loss = 7.0293e-02, PNorm = 84.6563, GNorm = 0.5669, lr_0 = 1.2701e-04
Loss = 7.8296e-02, PNorm = 84.6579, GNorm = 0.7194, lr_0 = 1.2692e-04
Loss = 6.9314e-02, PNorm = 84.6593, GNorm = 0.4558, lr_0 = 1.2684e-04
Loss = 7.2705e-02, PNorm = 84.6609, GNorm = 0.5210, lr_0 = 1.2675e-04
Loss = 8.0744e-02, PNorm = 84.6640, GNorm = 0.5433, lr_0 = 1.2666e-04
Loss = 8.1707e-02, PNorm = 84.6663, GNorm = 0.4286, lr_0 = 1.2658e-04
Loss = 8.1093e-02, PNorm = 84.6670, GNorm = 0.6197, lr_0 = 1.2649e-04
Loss = 8.0014e-02, PNorm = 84.6680, GNorm = 1.0494, lr_0 = 1.2640e-04
Loss = 6.9636e-02, PNorm = 84.6694, GNorm = 0.5997, lr_0 = 1.2632e-04
Loss = 7.9171e-02, PNorm = 84.6714, GNorm = 0.6570, lr_0 = 1.2623e-04
Loss = 8.3942e-02, PNorm = 84.6726, GNorm = 0.6105, lr_0 = 1.2614e-04
Loss = 7.9580e-02, PNorm = 84.6752, GNorm = 0.9620, lr_0 = 1.2606e-04
Loss = 6.8074e-02, PNorm = 84.6779, GNorm = 0.8003, lr_0 = 1.2597e-04
Loss = 7.4198e-02, PNorm = 84.6787, GNorm = 0.4941, lr_0 = 1.2588e-04
Loss = 7.6275e-02, PNorm = 84.6796, GNorm = 0.5950, lr_0 = 1.2580e-04
Loss = 7.4072e-02, PNorm = 84.6805, GNorm = 0.7247, lr_0 = 1.2571e-04
Loss = 7.5450e-02, PNorm = 84.6823, GNorm = 0.6160, lr_0 = 1.2563e-04
Loss = 7.9985e-02, PNorm = 84.6860, GNorm = 0.6708, lr_0 = 1.2554e-04
Loss = 7.3795e-02, PNorm = 84.6897, GNorm = 0.8347, lr_0 = 1.2545e-04
Loss = 9.2067e-02, PNorm = 84.6924, GNorm = 0.9328, lr_0 = 1.2537e-04
Loss = 8.5386e-02, PNorm = 84.6947, GNorm = 0.4863, lr_0 = 1.2528e-04
Loss = 7.3072e-02, PNorm = 84.6987, GNorm = 0.7608, lr_0 = 1.2520e-04
Loss = 8.9722e-02, PNorm = 84.7009, GNorm = 0.8781, lr_0 = 1.2511e-04
Loss = 7.2070e-02, PNorm = 84.7023, GNorm = 0.6569, lr_0 = 1.2502e-04
Loss = 7.8838e-02, PNorm = 84.7051, GNorm = 0.4842, lr_0 = 1.2494e-04
Loss = 7.5628e-02, PNorm = 84.7070, GNorm = 0.7375, lr_0 = 1.2485e-04
Loss = 8.3660e-02, PNorm = 84.7092, GNorm = 0.7583, lr_0 = 1.2477e-04
Loss = 7.5179e-02, PNorm = 84.7125, GNorm = 0.8321, lr_0 = 1.2468e-04
Loss = 7.2348e-02, PNorm = 84.7135, GNorm = 0.7327, lr_0 = 1.2460e-04
Loss = 8.6975e-02, PNorm = 84.7137, GNorm = 0.5928, lr_0 = 1.2451e-04
Loss = 6.8344e-02, PNorm = 84.7164, GNorm = 0.5714, lr_0 = 1.2443e-04
Loss = 7.8786e-02, PNorm = 84.7173, GNorm = 0.5617, lr_0 = 1.2434e-04
Loss = 7.5247e-02, PNorm = 84.7169, GNorm = 0.5805, lr_0 = 1.2426e-04
Loss = 7.2551e-02, PNorm = 84.7185, GNorm = 0.4307, lr_0 = 1.2417e-04
Loss = 8.3645e-02, PNorm = 84.7217, GNorm = 1.1834, lr_0 = 1.2409e-04
Loss = 8.3084e-02, PNorm = 84.7243, GNorm = 0.5898, lr_0 = 1.2400e-04
Loss = 6.9789e-02, PNorm = 84.7256, GNorm = 0.7601, lr_0 = 1.2392e-04
Loss = 7.4418e-02, PNorm = 84.7282, GNorm = 0.6349, lr_0 = 1.2383e-04
Loss = 7.4558e-02, PNorm = 84.7313, GNorm = 0.6579, lr_0 = 1.2375e-04
Loss = 8.1006e-02, PNorm = 84.7313, GNorm = 0.8691, lr_0 = 1.2366e-04
Loss = 7.2037e-02, PNorm = 84.7333, GNorm = 0.5182, lr_0 = 1.2358e-04
Loss = 9.0896e-02, PNorm = 84.7356, GNorm = 0.6039, lr_0 = 1.2349e-04
Loss = 7.2887e-02, PNorm = 84.7381, GNorm = 0.6019, lr_0 = 1.2341e-04
Loss = 8.1035e-02, PNorm = 84.7393, GNorm = 0.7515, lr_0 = 1.2332e-04
Loss = 9.0614e-02, PNorm = 84.7413, GNorm = 0.8486, lr_0 = 1.2324e-04
Loss = 7.6250e-02, PNorm = 84.7431, GNorm = 0.4303, lr_0 = 1.2315e-04
Loss = 7.9586e-02, PNorm = 84.7457, GNorm = 0.6648, lr_0 = 1.2307e-04
Loss = 7.2343e-02, PNorm = 84.7481, GNorm = 0.5395, lr_0 = 1.2298e-04
Loss = 7.5169e-02, PNorm = 84.7486, GNorm = 0.6253, lr_0 = 1.2290e-04
Loss = 8.5942e-02, PNorm = 84.7495, GNorm = 0.8569, lr_0 = 1.2282e-04
Loss = 8.2426e-02, PNorm = 84.7510, GNorm = 0.6000, lr_0 = 1.2273e-04
Loss = 7.0172e-02, PNorm = 84.7518, GNorm = 0.6240, lr_0 = 1.2265e-04
Loss = 7.8653e-02, PNorm = 84.7541, GNorm = 0.7417, lr_0 = 1.2256e-04
Loss = 7.7474e-02, PNorm = 84.7572, GNorm = 0.6748, lr_0 = 1.2248e-04
Loss = 7.3391e-02, PNorm = 84.7591, GNorm = 0.6209, lr_0 = 1.2240e-04
Loss = 7.4042e-02, PNorm = 84.7601, GNorm = 0.5010, lr_0 = 1.2231e-04
Loss = 7.9217e-02, PNorm = 84.7608, GNorm = 0.6305, lr_0 = 1.2223e-04
Loss = 7.1767e-02, PNorm = 84.7622, GNorm = 0.5238, lr_0 = 1.2214e-04
Loss = 8.1196e-02, PNorm = 84.7643, GNorm = 0.7360, lr_0 = 1.2206e-04
Loss = 7.5044e-02, PNorm = 84.7664, GNorm = 0.7607, lr_0 = 1.2198e-04
Loss = 9.0120e-02, PNorm = 84.7677, GNorm = 0.6359, lr_0 = 1.2189e-04
Loss = 7.5859e-02, PNorm = 84.7699, GNorm = 0.7505, lr_0 = 1.2181e-04
Loss = 8.2852e-02, PNorm = 84.7708, GNorm = 0.6067, lr_0 = 1.2173e-04
Loss = 7.9879e-02, PNorm = 84.7734, GNorm = 0.5343, lr_0 = 1.2164e-04
Loss = 7.5141e-02, PNorm = 84.7762, GNorm = 0.5079, lr_0 = 1.2156e-04
Loss = 7.6433e-02, PNorm = 84.7776, GNorm = 0.8410, lr_0 = 1.2148e-04
Loss = 8.4383e-02, PNorm = 84.7786, GNorm = 0.5256, lr_0 = 1.2139e-04
Loss = 7.8243e-02, PNorm = 84.7800, GNorm = 0.6021, lr_0 = 1.2131e-04
Loss = 9.6999e-02, PNorm = 84.7818, GNorm = 0.5462, lr_0 = 1.2123e-04
Loss = 7.2263e-02, PNorm = 84.7852, GNorm = 0.5657, lr_0 = 1.2114e-04
Loss = 6.2880e-02, PNorm = 84.7888, GNorm = 0.7130, lr_0 = 1.2106e-04
Loss = 7.6189e-02, PNorm = 84.7902, GNorm = 1.0256, lr_0 = 1.2098e-04
Loss = 6.7299e-02, PNorm = 84.7904, GNorm = 0.6621, lr_0 = 1.2090e-04
Loss = 7.9569e-02, PNorm = 84.7922, GNorm = 0.6713, lr_0 = 1.2081e-04
Loss = 7.7305e-02, PNorm = 84.7944, GNorm = 0.5958, lr_0 = 1.2073e-04
Loss = 8.9679e-02, PNorm = 84.7974, GNorm = 0.7509, lr_0 = 1.2065e-04
Loss = 7.5345e-02, PNorm = 84.7988, GNorm = 0.5401, lr_0 = 1.2056e-04
Loss = 7.6195e-02, PNorm = 84.8017, GNorm = 0.7417, lr_0 = 1.2048e-04
Loss = 8.7483e-02, PNorm = 84.8046, GNorm = 0.6154, lr_0 = 1.2040e-04
Loss = 7.3968e-02, PNorm = 84.8079, GNorm = 0.5907, lr_0 = 1.2032e-04
Loss = 7.3823e-02, PNorm = 84.8097, GNorm = 0.6224, lr_0 = 1.2023e-04
Loss = 7.5890e-02, PNorm = 84.8108, GNorm = 0.8786, lr_0 = 1.2015e-04
Loss = 8.4534e-02, PNorm = 84.8124, GNorm = 0.7601, lr_0 = 1.2007e-04
Loss = 8.7904e-02, PNorm = 84.8143, GNorm = 0.5077, lr_0 = 1.1999e-04
Loss = 8.1355e-02, PNorm = 84.8173, GNorm = 0.8844, lr_0 = 1.1991e-04
Loss = 7.8619e-02, PNorm = 84.8185, GNorm = 0.7275, lr_0 = 1.1982e-04
Loss = 6.8560e-02, PNorm = 84.8197, GNorm = 0.6078, lr_0 = 1.1974e-04
Loss = 8.7038e-02, PNorm = 84.8210, GNorm = 0.8231, lr_0 = 1.1966e-04
Loss = 7.1635e-02, PNorm = 84.8231, GNorm = 0.5672, lr_0 = 1.1958e-04
Loss = 7.9506e-02, PNorm = 84.8243, GNorm = 0.8696, lr_0 = 1.1950e-04
Loss = 7.8631e-02, PNorm = 84.8248, GNorm = 0.6613, lr_0 = 1.1941e-04
Loss = 7.9769e-02, PNorm = 84.8261, GNorm = 0.6571, lr_0 = 1.1933e-04
Loss = 7.6932e-02, PNorm = 84.8281, GNorm = 0.6244, lr_0 = 1.1925e-04
Loss = 7.7336e-02, PNorm = 84.8293, GNorm = 0.5604, lr_0 = 1.1917e-04
Loss = 7.6699e-02, PNorm = 84.8304, GNorm = 0.7536, lr_0 = 1.1909e-04
Loss = 7.4926e-02, PNorm = 84.8311, GNorm = 0.6314, lr_0 = 1.1901e-04
Loss = 8.8875e-02, PNorm = 84.8339, GNorm = 0.6634, lr_0 = 1.1892e-04
Loss = 8.8719e-02, PNorm = 84.8358, GNorm = 0.8534, lr_0 = 1.1884e-04
Loss = 7.0700e-02, PNorm = 84.8365, GNorm = 0.8437, lr_0 = 1.1876e-04
Loss = 7.7784e-02, PNorm = 84.8366, GNorm = 0.6079, lr_0 = 1.1868e-04
Loss = 8.3881e-02, PNorm = 84.8372, GNorm = 0.6293, lr_0 = 1.1860e-04
Loss = 7.8257e-02, PNorm = 84.8394, GNorm = 0.6249, lr_0 = 1.1852e-04
Loss = 6.9783e-02, PNorm = 84.8407, GNorm = 0.5278, lr_0 = 1.1844e-04
Loss = 8.4015e-02, PNorm = 84.8431, GNorm = 0.7333, lr_0 = 1.1835e-04
Loss = 8.0696e-02, PNorm = 84.8472, GNorm = 0.8318, lr_0 = 1.1827e-04
Loss = 7.7608e-02, PNorm = 84.8499, GNorm = 0.8612, lr_0 = 1.1819e-04
Loss = 7.5990e-02, PNorm = 84.8510, GNorm = 0.5903, lr_0 = 1.1811e-04
Loss = 8.0974e-02, PNorm = 84.8526, GNorm = 0.7531, lr_0 = 1.1803e-04
Loss = 7.4529e-02, PNorm = 84.8534, GNorm = 0.5670, lr_0 = 1.1795e-04
Loss = 8.3642e-02, PNorm = 84.8550, GNorm = 0.6939, lr_0 = 1.1787e-04
Validation mae = 0.227503
Epoch 28
Loss = 7.4541e-02, PNorm = 84.8572, GNorm = 0.7924, lr_0 = 1.1779e-04
Loss = 8.5543e-02, PNorm = 84.8588, GNorm = 0.6031, lr_0 = 1.1771e-04
Loss = 7.9158e-02, PNorm = 84.8596, GNorm = 0.7969, lr_0 = 1.1763e-04
Loss = 7.8671e-02, PNorm = 84.8618, GNorm = 0.6699, lr_0 = 1.1755e-04
Loss = 7.3072e-02, PNorm = 84.8641, GNorm = 0.6467, lr_0 = 1.1747e-04
Loss = 7.5339e-02, PNorm = 84.8652, GNorm = 0.6350, lr_0 = 1.1739e-04
Loss = 6.5205e-02, PNorm = 84.8658, GNorm = 0.5241, lr_0 = 1.1730e-04
Loss = 7.2256e-02, PNorm = 84.8680, GNorm = 0.8877, lr_0 = 1.1722e-04
Loss = 7.1156e-02, PNorm = 84.8711, GNorm = 0.6237, lr_0 = 1.1714e-04
Loss = 8.2063e-02, PNorm = 84.8742, GNorm = 0.6839, lr_0 = 1.1706e-04
Loss = 6.7474e-02, PNorm = 84.8767, GNorm = 0.5659, lr_0 = 1.1698e-04
Loss = 6.9081e-02, PNorm = 84.8786, GNorm = 0.6894, lr_0 = 1.1690e-04
Loss = 7.8187e-02, PNorm = 84.8804, GNorm = 0.4929, lr_0 = 1.1682e-04
Loss = 7.8090e-02, PNorm = 84.8826, GNorm = 0.7066, lr_0 = 1.1674e-04
Loss = 7.4144e-02, PNorm = 84.8841, GNorm = 0.7736, lr_0 = 1.1666e-04
Loss = 7.1851e-02, PNorm = 84.8848, GNorm = 0.6418, lr_0 = 1.1658e-04
Loss = 7.1645e-02, PNorm = 84.8861, GNorm = 0.6119, lr_0 = 1.1650e-04
Loss = 9.0447e-02, PNorm = 84.8879, GNorm = 0.5729, lr_0 = 1.1642e-04
Loss = 7.7780e-02, PNorm = 84.8911, GNorm = 0.6479, lr_0 = 1.1634e-04
Loss = 7.3512e-02, PNorm = 84.8924, GNorm = 0.6817, lr_0 = 1.1626e-04
Loss = 6.4087e-02, PNorm = 84.8930, GNorm = 0.3933, lr_0 = 1.1618e-04
Loss = 6.9128e-02, PNorm = 84.8952, GNorm = 0.7446, lr_0 = 1.1611e-04
Loss = 7.5373e-02, PNorm = 84.8961, GNorm = 0.4938, lr_0 = 1.1603e-04
Loss = 7.7294e-02, PNorm = 84.8969, GNorm = 0.8727, lr_0 = 1.1595e-04
Loss = 7.1154e-02, PNorm = 84.8974, GNorm = 0.5907, lr_0 = 1.1587e-04
Loss = 7.9060e-02, PNorm = 84.8998, GNorm = 0.7766, lr_0 = 1.1579e-04
Loss = 6.8870e-02, PNorm = 84.9030, GNorm = 0.5234, lr_0 = 1.1571e-04
Loss = 8.1761e-02, PNorm = 84.9046, GNorm = 0.6087, lr_0 = 1.1563e-04
Loss = 9.9262e-02, PNorm = 84.9069, GNorm = 0.5008, lr_0 = 1.1555e-04
Loss = 7.6609e-02, PNorm = 84.9083, GNorm = 0.6090, lr_0 = 1.1547e-04
Loss = 8.0195e-02, PNorm = 84.9110, GNorm = 0.6688, lr_0 = 1.1539e-04
Loss = 7.8456e-02, PNorm = 84.9124, GNorm = 0.6319, lr_0 = 1.1531e-04
Loss = 8.9834e-02, PNorm = 84.9143, GNorm = 0.5388, lr_0 = 1.1523e-04
Loss = 7.3243e-02, PNorm = 84.9158, GNorm = 0.6698, lr_0 = 1.1515e-04
Loss = 6.6918e-02, PNorm = 84.9158, GNorm = 0.6343, lr_0 = 1.1508e-04
Loss = 7.8155e-02, PNorm = 84.9171, GNorm = 0.8404, lr_0 = 1.1500e-04
Loss = 7.6540e-02, PNorm = 84.9195, GNorm = 0.6068, lr_0 = 1.1492e-04
Loss = 8.8638e-02, PNorm = 84.9225, GNorm = 0.9770, lr_0 = 1.1484e-04
Loss = 8.3793e-02, PNorm = 84.9247, GNorm = 0.6505, lr_0 = 1.1476e-04
Loss = 7.9407e-02, PNorm = 84.9257, GNorm = 0.7044, lr_0 = 1.1468e-04
Loss = 7.9142e-02, PNorm = 84.9272, GNorm = 0.7665, lr_0 = 1.1460e-04
Loss = 7.5892e-02, PNorm = 84.9294, GNorm = 0.7135, lr_0 = 1.1452e-04
Loss = 7.1656e-02, PNorm = 84.9302, GNorm = 0.5518, lr_0 = 1.1445e-04
Loss = 7.9946e-02, PNorm = 84.9311, GNorm = 0.7172, lr_0 = 1.1437e-04
Loss = 7.3244e-02, PNorm = 84.9320, GNorm = 0.6036, lr_0 = 1.1429e-04
Loss = 7.9318e-02, PNorm = 84.9339, GNorm = 0.6040, lr_0 = 1.1421e-04
Loss = 7.1782e-02, PNorm = 84.9343, GNorm = 0.5546, lr_0 = 1.1413e-04
Loss = 7.9417e-02, PNorm = 84.9344, GNorm = 0.7523, lr_0 = 1.1405e-04
Loss = 8.0674e-02, PNorm = 84.9383, GNorm = 0.6830, lr_0 = 1.1398e-04
Loss = 7.2799e-02, PNorm = 84.9411, GNorm = 0.5248, lr_0 = 1.1390e-04
Loss = 7.5293e-02, PNorm = 84.9428, GNorm = 0.6745, lr_0 = 1.1382e-04
Loss = 7.4359e-02, PNorm = 84.9436, GNorm = 0.8153, lr_0 = 1.1374e-04
Loss = 7.6433e-02, PNorm = 84.9443, GNorm = 0.7194, lr_0 = 1.1366e-04
Loss = 7.8118e-02, PNorm = 84.9479, GNorm = 0.7441, lr_0 = 1.1359e-04
Loss = 6.9343e-02, PNorm = 84.9473, GNorm = 0.6867, lr_0 = 1.1351e-04
Loss = 7.1462e-02, PNorm = 84.9478, GNorm = 0.6281, lr_0 = 1.1343e-04
Loss = 8.6579e-02, PNorm = 84.9488, GNorm = 0.4849, lr_0 = 1.1335e-04
Loss = 7.2135e-02, PNorm = 84.9508, GNorm = 0.4823, lr_0 = 1.1328e-04
Loss = 7.3552e-02, PNorm = 84.9520, GNorm = 0.6081, lr_0 = 1.1320e-04
Loss = 7.2952e-02, PNorm = 84.9541, GNorm = 0.6592, lr_0 = 1.1312e-04
Loss = 7.1408e-02, PNorm = 84.9563, GNorm = 0.5778, lr_0 = 1.1304e-04
Loss = 7.7662e-02, PNorm = 84.9590, GNorm = 0.6457, lr_0 = 1.1297e-04
Loss = 7.3402e-02, PNorm = 84.9610, GNorm = 0.7157, lr_0 = 1.1289e-04
Loss = 8.6441e-02, PNorm = 84.9634, GNorm = 0.9421, lr_0 = 1.1281e-04
Loss = 8.3554e-02, PNorm = 84.9644, GNorm = 0.8162, lr_0 = 1.1273e-04
Loss = 7.5285e-02, PNorm = 84.9658, GNorm = 0.6831, lr_0 = 1.1266e-04
Loss = 7.4307e-02, PNorm = 84.9660, GNorm = 0.6620, lr_0 = 1.1258e-04
Loss = 7.6643e-02, PNorm = 84.9669, GNorm = 0.7749, lr_0 = 1.1250e-04
Loss = 7.4932e-02, PNorm = 84.9676, GNorm = 0.5672, lr_0 = 1.1243e-04
Loss = 6.7678e-02, PNorm = 84.9676, GNorm = 0.6986, lr_0 = 1.1235e-04
Loss = 7.6654e-02, PNorm = 84.9684, GNorm = 0.6348, lr_0 = 1.1227e-04
Loss = 7.3889e-02, PNorm = 84.9692, GNorm = 0.6273, lr_0 = 1.1219e-04
Loss = 6.7609e-02, PNorm = 84.9700, GNorm = 0.5232, lr_0 = 1.1212e-04
Loss = 7.4469e-02, PNorm = 84.9715, GNorm = 0.8444, lr_0 = 1.1204e-04
Loss = 8.8446e-02, PNorm = 84.9716, GNorm = 0.6086, lr_0 = 1.1196e-04
Loss = 7.3416e-02, PNorm = 84.9735, GNorm = 0.6167, lr_0 = 1.1189e-04
Loss = 8.0235e-02, PNorm = 84.9763, GNorm = 0.7243, lr_0 = 1.1181e-04
Loss = 7.7423e-02, PNorm = 84.9770, GNorm = 1.1212, lr_0 = 1.1173e-04
Loss = 7.6448e-02, PNorm = 84.9786, GNorm = 0.5391, lr_0 = 1.1166e-04
Loss = 7.0283e-02, PNorm = 84.9806, GNorm = 0.5887, lr_0 = 1.1158e-04
Loss = 7.1285e-02, PNorm = 84.9817, GNorm = 0.7204, lr_0 = 1.1150e-04
Loss = 7.8090e-02, PNorm = 84.9828, GNorm = 0.7966, lr_0 = 1.1143e-04
Loss = 6.9248e-02, PNorm = 84.9846, GNorm = 0.5358, lr_0 = 1.1135e-04
Loss = 7.8345e-02, PNorm = 84.9868, GNorm = 0.6427, lr_0 = 1.1128e-04
Loss = 6.9022e-02, PNorm = 84.9883, GNorm = 0.6566, lr_0 = 1.1120e-04
Loss = 8.4367e-02, PNorm = 84.9907, GNorm = 0.6217, lr_0 = 1.1112e-04
Loss = 7.0300e-02, PNorm = 84.9924, GNorm = 0.6309, lr_0 = 1.1105e-04
Loss = 7.6647e-02, PNorm = 84.9948, GNorm = 0.5048, lr_0 = 1.1097e-04
Loss = 7.8169e-02, PNorm = 84.9962, GNorm = 0.6220, lr_0 = 1.1089e-04
Loss = 6.9571e-02, PNorm = 84.9981, GNorm = 0.6169, lr_0 = 1.1082e-04
Loss = 6.9326e-02, PNorm = 85.0009, GNorm = 0.7268, lr_0 = 1.1074e-04
Loss = 8.2321e-02, PNorm = 85.0027, GNorm = 0.6012, lr_0 = 1.1067e-04
Loss = 7.6323e-02, PNorm = 85.0051, GNorm = 0.6107, lr_0 = 1.1059e-04
Loss = 7.4537e-02, PNorm = 85.0064, GNorm = 0.5717, lr_0 = 1.1052e-04
Loss = 8.6688e-02, PNorm = 85.0088, GNorm = 0.8534, lr_0 = 1.1044e-04
Loss = 7.2024e-02, PNorm = 85.0104, GNorm = 0.6126, lr_0 = 1.1036e-04
Loss = 7.4998e-02, PNorm = 85.0110, GNorm = 0.8365, lr_0 = 1.1029e-04
Loss = 7.3608e-02, PNorm = 85.0126, GNorm = 0.5122, lr_0 = 1.1021e-04
Loss = 8.0192e-02, PNorm = 85.0147, GNorm = 0.6971, lr_0 = 1.1014e-04
Loss = 7.1287e-02, PNorm = 85.0148, GNorm = 0.5602, lr_0 = 1.1006e-04
Loss = 8.2309e-02, PNorm = 85.0160, GNorm = 0.5260, lr_0 = 1.0999e-04
Loss = 8.0608e-02, PNorm = 85.0190, GNorm = 0.5914, lr_0 = 1.0991e-04
Loss = 7.8044e-02, PNorm = 85.0211, GNorm = 0.6032, lr_0 = 1.0984e-04
Loss = 7.0883e-02, PNorm = 85.0223, GNorm = 0.5928, lr_0 = 1.0976e-04
Loss = 7.1308e-02, PNorm = 85.0247, GNorm = 0.5452, lr_0 = 1.0969e-04
Loss = 7.1768e-02, PNorm = 85.0270, GNorm = 0.5426, lr_0 = 1.0961e-04
Loss = 7.3280e-02, PNorm = 85.0284, GNorm = 0.5766, lr_0 = 1.0954e-04
Loss = 7.4410e-02, PNorm = 85.0276, GNorm = 0.6514, lr_0 = 1.0946e-04
Loss = 8.0155e-02, PNorm = 85.0275, GNorm = 0.4801, lr_0 = 1.0939e-04
Loss = 6.7835e-02, PNorm = 85.0285, GNorm = 0.5166, lr_0 = 1.0931e-04
Loss = 1.0736e-01, PNorm = 85.0325, GNorm = 0.5897, lr_0 = 1.0924e-04
Loss = 7.1479e-02, PNorm = 85.0344, GNorm = 0.5938, lr_0 = 1.0916e-04
Loss = 9.8035e-02, PNorm = 85.0345, GNorm = 0.8836, lr_0 = 1.0909e-04
Loss = 7.7212e-02, PNorm = 85.0355, GNorm = 0.4667, lr_0 = 1.0901e-04
Loss = 7.2858e-02, PNorm = 85.0382, GNorm = 0.6949, lr_0 = 1.0894e-04
Loss = 7.7683e-02, PNorm = 85.0401, GNorm = 0.6477, lr_0 = 1.0886e-04
Loss = 7.6861e-02, PNorm = 85.0406, GNorm = 0.6379, lr_0 = 1.0879e-04
Loss = 8.1949e-02, PNorm = 85.0408, GNorm = 0.7304, lr_0 = 1.0871e-04
Loss = 6.8347e-02, PNorm = 85.0418, GNorm = 0.5454, lr_0 = 1.0864e-04
Loss = 8.3737e-02, PNorm = 85.0437, GNorm = 0.7446, lr_0 = 1.0856e-04
Validation mae = 0.228639
Epoch 29
Loss = 6.6370e-02, PNorm = 85.0446, GNorm = 0.6029, lr_0 = 1.0849e-04
Loss = 7.3280e-02, PNorm = 85.0464, GNorm = 0.7535, lr_0 = 1.0841e-04
Loss = 8.3694e-02, PNorm = 85.0482, GNorm = 0.5327, lr_0 = 1.0834e-04
Loss = 7.5839e-02, PNorm = 85.0504, GNorm = 0.5224, lr_0 = 1.0827e-04
Loss = 7.0222e-02, PNorm = 85.0528, GNorm = 0.5935, lr_0 = 1.0819e-04
Loss = 7.3738e-02, PNorm = 85.0544, GNorm = 0.6525, lr_0 = 1.0812e-04
Loss = 6.7229e-02, PNorm = 85.0572, GNorm = 0.7494, lr_0 = 1.0804e-04
Loss = 7.1321e-02, PNorm = 85.0600, GNorm = 0.9078, lr_0 = 1.0797e-04
Loss = 6.5380e-02, PNorm = 85.0608, GNorm = 0.6897, lr_0 = 1.0790e-04
Loss = 6.5151e-02, PNorm = 85.0621, GNorm = 0.6037, lr_0 = 1.0782e-04
Loss = 7.1024e-02, PNorm = 85.0630, GNorm = 0.6737, lr_0 = 1.0775e-04
Loss = 7.6188e-02, PNorm = 85.0640, GNorm = 0.7985, lr_0 = 1.0767e-04
Loss = 8.1869e-02, PNorm = 85.0638, GNorm = 0.7865, lr_0 = 1.0760e-04
Loss = 7.6118e-02, PNorm = 85.0664, GNorm = 0.6272, lr_0 = 1.0753e-04
Loss = 6.5624e-02, PNorm = 85.0692, GNorm = 0.6188, lr_0 = 1.0745e-04
Loss = 6.7081e-02, PNorm = 85.0709, GNorm = 0.6341, lr_0 = 1.0738e-04
Loss = 7.2408e-02, PNorm = 85.0704, GNorm = 0.6493, lr_0 = 1.0731e-04
Loss = 7.5585e-02, PNorm = 85.0716, GNorm = 0.7346, lr_0 = 1.0723e-04
Loss = 7.0969e-02, PNorm = 85.0726, GNorm = 0.8675, lr_0 = 1.0716e-04
Loss = 6.1897e-02, PNorm = 85.0733, GNorm = 0.5254, lr_0 = 1.0709e-04
Loss = 8.5793e-02, PNorm = 85.0764, GNorm = 0.5178, lr_0 = 1.0701e-04
Loss = 8.1156e-02, PNorm = 85.0795, GNorm = 1.1007, lr_0 = 1.0694e-04
Loss = 7.1859e-02, PNorm = 85.0803, GNorm = 0.8071, lr_0 = 1.0687e-04
Loss = 7.8714e-02, PNorm = 85.0804, GNorm = 0.7011, lr_0 = 1.0679e-04
Loss = 6.6781e-02, PNorm = 85.0818, GNorm = 0.5794, lr_0 = 1.0672e-04
Loss = 6.4408e-02, PNorm = 85.0844, GNorm = 0.5840, lr_0 = 1.0665e-04
Loss = 7.5397e-02, PNorm = 85.0861, GNorm = 0.5684, lr_0 = 1.0657e-04
Loss = 8.1812e-02, PNorm = 85.0869, GNorm = 0.7188, lr_0 = 1.0650e-04
Loss = 8.0306e-02, PNorm = 85.0878, GNorm = 0.5963, lr_0 = 1.0643e-04
Loss = 6.6506e-02, PNorm = 85.0897, GNorm = 0.6151, lr_0 = 1.0635e-04
Loss = 7.7487e-02, PNorm = 85.0920, GNorm = 0.7916, lr_0 = 1.0628e-04
Loss = 7.5199e-02, PNorm = 85.0937, GNorm = 0.4997, lr_0 = 1.0621e-04
Loss = 7.1278e-02, PNorm = 85.0956, GNorm = 0.6144, lr_0 = 1.0614e-04
Loss = 8.1043e-02, PNorm = 85.0971, GNorm = 0.6974, lr_0 = 1.0606e-04
Loss = 7.9549e-02, PNorm = 85.0976, GNorm = 0.6531, lr_0 = 1.0599e-04
Loss = 6.9150e-02, PNorm = 85.0961, GNorm = 0.4644, lr_0 = 1.0592e-04
Loss = 7.5284e-02, PNorm = 85.0957, GNorm = 0.5713, lr_0 = 1.0585e-04
Loss = 8.0229e-02, PNorm = 85.0959, GNorm = 0.5571, lr_0 = 1.0577e-04
Loss = 7.7063e-02, PNorm = 85.0977, GNorm = 0.4922, lr_0 = 1.0570e-04
Loss = 6.6147e-02, PNorm = 85.1007, GNorm = 0.4970, lr_0 = 1.0563e-04
Loss = 7.9516e-02, PNorm = 85.1012, GNorm = 0.4490, lr_0 = 1.0556e-04
Loss = 7.0902e-02, PNorm = 85.1021, GNorm = 0.5511, lr_0 = 1.0548e-04
Loss = 6.9396e-02, PNorm = 85.1051, GNorm = 0.6296, lr_0 = 1.0541e-04
Loss = 6.7853e-02, PNorm = 85.1075, GNorm = 0.4363, lr_0 = 1.0534e-04
Loss = 7.1557e-02, PNorm = 85.1076, GNorm = 0.6800, lr_0 = 1.0527e-04
Loss = 7.6120e-02, PNorm = 85.1088, GNorm = 0.7233, lr_0 = 1.0519e-04
Loss = 6.9687e-02, PNorm = 85.1120, GNorm = 0.7372, lr_0 = 1.0512e-04
Loss = 8.2216e-02, PNorm = 85.1150, GNorm = 0.6357, lr_0 = 1.0505e-04
Loss = 7.7543e-02, PNorm = 85.1167, GNorm = 0.6931, lr_0 = 1.0498e-04
Loss = 7.6034e-02, PNorm = 85.1181, GNorm = 0.5559, lr_0 = 1.0491e-04
Loss = 6.7279e-02, PNorm = 85.1198, GNorm = 0.6340, lr_0 = 1.0483e-04
Loss = 8.0262e-02, PNorm = 85.1209, GNorm = 0.6850, lr_0 = 1.0476e-04
Loss = 7.3573e-02, PNorm = 85.1221, GNorm = 0.5185, lr_0 = 1.0469e-04
Loss = 7.3662e-02, PNorm = 85.1239, GNorm = 0.5927, lr_0 = 1.0462e-04
Loss = 7.4731e-02, PNorm = 85.1249, GNorm = 0.5834, lr_0 = 1.0455e-04
Loss = 7.1015e-02, PNorm = 85.1261, GNorm = 0.6663, lr_0 = 1.0448e-04
Loss = 9.1349e-02, PNorm = 85.1286, GNorm = 0.7337, lr_0 = 1.0440e-04
Loss = 8.2036e-02, PNorm = 85.1287, GNorm = 0.7171, lr_0 = 1.0433e-04
Loss = 7.2422e-02, PNorm = 85.1293, GNorm = 0.7198, lr_0 = 1.0426e-04
Loss = 6.7730e-02, PNorm = 85.1315, GNorm = 0.5916, lr_0 = 1.0419e-04
Loss = 7.4965e-02, PNorm = 85.1329, GNorm = 0.5620, lr_0 = 1.0412e-04
Loss = 8.0226e-02, PNorm = 85.1343, GNorm = 0.5894, lr_0 = 1.0405e-04
Loss = 9.1091e-02, PNorm = 85.1360, GNorm = 0.7692, lr_0 = 1.0398e-04
Loss = 7.2310e-02, PNorm = 85.1369, GNorm = 0.5501, lr_0 = 1.0391e-04
Loss = 7.5375e-02, PNorm = 85.1387, GNorm = 0.5054, lr_0 = 1.0383e-04
Loss = 7.9544e-02, PNorm = 85.1394, GNorm = 0.6389, lr_0 = 1.0376e-04
Loss = 7.5767e-02, PNorm = 85.1408, GNorm = 0.6533, lr_0 = 1.0369e-04
Loss = 6.3599e-02, PNorm = 85.1415, GNorm = 0.7366, lr_0 = 1.0362e-04
Loss = 7.1328e-02, PNorm = 85.1415, GNorm = 0.7098, lr_0 = 1.0355e-04
Loss = 7.6102e-02, PNorm = 85.1430, GNorm = 0.6312, lr_0 = 1.0348e-04
Loss = 6.8634e-02, PNorm = 85.1454, GNorm = 0.6729, lr_0 = 1.0341e-04
Loss = 7.9217e-02, PNorm = 85.1479, GNorm = 0.5047, lr_0 = 1.0334e-04
Loss = 7.3966e-02, PNorm = 85.1500, GNorm = 0.8201, lr_0 = 1.0327e-04
Loss = 7.4620e-02, PNorm = 85.1510, GNorm = 0.7023, lr_0 = 1.0320e-04
Loss = 8.4056e-02, PNorm = 85.1528, GNorm = 0.7275, lr_0 = 1.0312e-04
Loss = 7.4273e-02, PNorm = 85.1554, GNorm = 0.5545, lr_0 = 1.0305e-04
Loss = 6.9048e-02, PNorm = 85.1568, GNorm = 0.6154, lr_0 = 1.0298e-04
Loss = 8.2416e-02, PNorm = 85.1588, GNorm = 0.9476, lr_0 = 1.0291e-04
Loss = 8.1340e-02, PNorm = 85.1607, GNorm = 0.7444, lr_0 = 1.0284e-04
Loss = 7.7788e-02, PNorm = 85.1614, GNorm = 0.6700, lr_0 = 1.0277e-04
Loss = 7.7584e-02, PNorm = 85.1622, GNorm = 0.5204, lr_0 = 1.0270e-04
Loss = 7.5133e-02, PNorm = 85.1639, GNorm = 0.5567, lr_0 = 1.0263e-04
Loss = 7.3373e-02, PNorm = 85.1671, GNorm = 0.5495, lr_0 = 1.0256e-04
Loss = 8.3339e-02, PNorm = 85.1698, GNorm = 0.8524, lr_0 = 1.0249e-04
Loss = 7.8490e-02, PNorm = 85.1690, GNorm = 0.6227, lr_0 = 1.0242e-04
Loss = 7.2300e-02, PNorm = 85.1697, GNorm = 0.6251, lr_0 = 1.0235e-04
Loss = 7.7185e-02, PNorm = 85.1707, GNorm = 0.6318, lr_0 = 1.0228e-04
Loss = 7.4761e-02, PNorm = 85.1711, GNorm = 0.5396, lr_0 = 1.0221e-04
Loss = 8.1255e-02, PNorm = 85.1715, GNorm = 0.7698, lr_0 = 1.0214e-04
Loss = 7.6486e-02, PNorm = 85.1726, GNorm = 0.8072, lr_0 = 1.0207e-04
Loss = 7.1631e-02, PNorm = 85.1758, GNorm = 0.6655, lr_0 = 1.0200e-04
Loss = 8.3240e-02, PNorm = 85.1779, GNorm = 0.7872, lr_0 = 1.0193e-04
Loss = 7.0050e-02, PNorm = 85.1788, GNorm = 0.5818, lr_0 = 1.0186e-04
Loss = 8.4173e-02, PNorm = 85.1792, GNorm = 0.6157, lr_0 = 1.0179e-04
Loss = 7.4706e-02, PNorm = 85.1817, GNorm = 0.6838, lr_0 = 1.0172e-04
Loss = 7.2423e-02, PNorm = 85.1829, GNorm = 0.9951, lr_0 = 1.0165e-04
Loss = 7.8110e-02, PNorm = 85.1846, GNorm = 0.6749, lr_0 = 1.0158e-04
Loss = 7.3016e-02, PNorm = 85.1860, GNorm = 0.7631, lr_0 = 1.0151e-04
Loss = 7.5699e-02, PNorm = 85.1874, GNorm = 0.6005, lr_0 = 1.0144e-04
Loss = 7.5046e-02, PNorm = 85.1885, GNorm = 0.5346, lr_0 = 1.0137e-04
Loss = 7.3526e-02, PNorm = 85.1896, GNorm = 0.5853, lr_0 = 1.0130e-04
Loss = 7.0076e-02, PNorm = 85.1915, GNorm = 0.6922, lr_0 = 1.0123e-04
Loss = 7.5365e-02, PNorm = 85.1931, GNorm = 0.9821, lr_0 = 1.0116e-04
Loss = 7.9419e-02, PNorm = 85.1944, GNorm = 0.6373, lr_0 = 1.0110e-04
Loss = 7.4288e-02, PNorm = 85.1941, GNorm = 0.6459, lr_0 = 1.0103e-04
Loss = 6.7244e-02, PNorm = 85.1943, GNorm = 0.6559, lr_0 = 1.0096e-04
Loss = 7.9073e-02, PNorm = 85.1951, GNorm = 0.6388, lr_0 = 1.0089e-04
Loss = 7.3470e-02, PNorm = 85.1960, GNorm = 0.7003, lr_0 = 1.0082e-04
Loss = 8.3415e-02, PNorm = 85.1980, GNorm = 0.4917, lr_0 = 1.0075e-04
Loss = 7.4738e-02, PNorm = 85.2005, GNorm = 0.8730, lr_0 = 1.0068e-04
Loss = 8.7918e-02, PNorm = 85.2020, GNorm = 0.5182, lr_0 = 1.0061e-04
Loss = 7.2148e-02, PNorm = 85.2027, GNorm = 0.8658, lr_0 = 1.0054e-04
Loss = 8.0667e-02, PNorm = 85.2032, GNorm = 0.6740, lr_0 = 1.0047e-04
Loss = 7.9627e-02, PNorm = 85.2043, GNorm = 0.5972, lr_0 = 1.0041e-04
Loss = 8.7024e-02, PNorm = 85.2061, GNorm = 0.7353, lr_0 = 1.0034e-04
Loss = 7.4745e-02, PNorm = 85.2079, GNorm = 0.9110, lr_0 = 1.0027e-04
Loss = 7.9601e-02, PNorm = 85.2080, GNorm = 0.8552, lr_0 = 1.0020e-04
Loss = 7.3033e-02, PNorm = 85.2085, GNorm = 0.5763, lr_0 = 1.0013e-04
Loss = 8.9582e-02, PNorm = 85.2093, GNorm = 0.6234, lr_0 = 1.0006e-04
Loss = 8.0815e-02, PNorm = 85.2107, GNorm = 0.5760, lr_0 = 1.0000e-04
Validation mae = 0.226140
Model 0 best validation mae = 0.226140 on epoch 29
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.225236
Ensemble test mae = 0.225236
Fold 6
Splitting data with seed 6
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.1, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=500, bias=False)
        (W_h): Linear(in_features=500, out_features=500, bias=False)
        (W_o): Linear(in_features=633, out_features=500, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.1, inplace=False)
    (1): Linear(in_features=500, out_features=500, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.1, inplace=False)
    (4): Linear(in_features=500, out_features=500, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.1, inplace=False)
    (7): Linear(in_features=500, out_features=1, bias=True)
  )
)
Number of parameters = 1,142,001
Moving model to cuda
Epoch 0
Loss = 9.4279e-01, PNorm = 47.8613, GNorm = 4.2126, lr_0 = 1.0413e-04
Loss = 9.0394e-01, PNorm = 47.8626, GNorm = 1.7750, lr_0 = 1.0788e-04
Loss = 9.5165e-01, PNorm = 47.8636, GNorm = 3.5746, lr_0 = 1.1163e-04
Loss = 9.5383e-01, PNorm = 47.8653, GNorm = 3.1638, lr_0 = 1.1537e-04
Loss = 9.6886e-01, PNorm = 47.8679, GNorm = 4.5625, lr_0 = 1.1913e-04
Loss = 8.4203e-01, PNorm = 47.8718, GNorm = 3.6798, lr_0 = 1.2287e-04
Loss = 8.6019e-01, PNorm = 47.8770, GNorm = 3.5145, lr_0 = 1.2663e-04
Loss = 7.4501e-01, PNorm = 47.8835, GNorm = 2.6984, lr_0 = 1.3038e-04
Loss = 7.0158e-01, PNorm = 47.8911, GNorm = 4.2749, lr_0 = 1.3413e-04
Loss = 6.4422e-01, PNorm = 47.8981, GNorm = 7.4713, lr_0 = 1.3788e-04
Loss = 6.4392e-01, PNorm = 47.9037, GNorm = 11.6331, lr_0 = 1.4163e-04
Loss = 6.4674e-01, PNorm = 47.9099, GNorm = 3.9170, lr_0 = 1.4537e-04
Loss = 5.4558e-01, PNorm = 47.9152, GNorm = 5.9411, lr_0 = 1.4913e-04
Loss = 5.3408e-01, PNorm = 47.9208, GNorm = 5.8568, lr_0 = 1.5288e-04
Loss = 5.5135e-01, PNorm = 47.9257, GNorm = 18.9205, lr_0 = 1.5662e-04
Loss = 4.9200e-01, PNorm = 47.9305, GNorm = 3.5307, lr_0 = 1.6038e-04
Loss = 4.9868e-01, PNorm = 47.9360, GNorm = 6.1693, lr_0 = 1.6412e-04
Loss = 3.9804e-01, PNorm = 47.9424, GNorm = 3.5986, lr_0 = 1.6788e-04
Loss = 4.7754e-01, PNorm = 47.9485, GNorm = 5.4833, lr_0 = 1.7163e-04
Loss = 4.9142e-01, PNorm = 47.9527, GNorm = 4.7317, lr_0 = 1.7538e-04
Loss = 4.9303e-01, PNorm = 47.9580, GNorm = 3.4063, lr_0 = 1.7913e-04
Loss = 4.6626e-01, PNorm = 47.9636, GNorm = 4.7682, lr_0 = 1.8288e-04
Loss = 4.7727e-01, PNorm = 47.9702, GNorm = 11.7897, lr_0 = 1.8662e-04
Loss = 4.0235e-01, PNorm = 47.9741, GNorm = 1.7361, lr_0 = 1.9038e-04
Loss = 4.6977e-01, PNorm = 47.9797, GNorm = 8.8259, lr_0 = 1.9413e-04
Loss = 4.3277e-01, PNorm = 47.9862, GNorm = 18.5991, lr_0 = 1.9788e-04
Loss = 3.6623e-01, PNorm = 47.9903, GNorm = 8.0662, lr_0 = 2.0163e-04
Loss = 4.1656e-01, PNorm = 47.9957, GNorm = 10.9414, lr_0 = 2.0537e-04
Loss = 4.1422e-01, PNorm = 48.0023, GNorm = 6.7644, lr_0 = 2.0913e-04
Loss = 3.5507e-01, PNorm = 48.0077, GNorm = 5.8915, lr_0 = 2.1288e-04
Loss = 3.5226e-01, PNorm = 48.0137, GNorm = 13.3345, lr_0 = 2.1663e-04
Loss = 3.9746e-01, PNorm = 48.0164, GNorm = 6.9963, lr_0 = 2.2038e-04
Loss = 3.8999e-01, PNorm = 48.0218, GNorm = 9.7848, lr_0 = 2.2412e-04
Loss = 3.4412e-01, PNorm = 48.0270, GNorm = 12.5158, lr_0 = 2.2787e-04
Loss = 3.4441e-01, PNorm = 48.0330, GNorm = 2.5869, lr_0 = 2.3163e-04
Loss = 3.5833e-01, PNorm = 48.0374, GNorm = 14.0759, lr_0 = 2.3538e-04
Loss = 3.4657e-01, PNorm = 48.0402, GNorm = 2.0533, lr_0 = 2.3913e-04
Loss = 3.8644e-01, PNorm = 48.0466, GNorm = 3.1262, lr_0 = 2.4288e-04
Loss = 3.8496e-01, PNorm = 48.0521, GNorm = 1.8303, lr_0 = 2.4662e-04
Loss = 3.4509e-01, PNorm = 48.0570, GNorm = 1.6705, lr_0 = 2.5038e-04
Loss = 3.5531e-01, PNorm = 48.0644, GNorm = 10.9515, lr_0 = 2.5413e-04
Loss = 3.5697e-01, PNorm = 48.0716, GNorm = 13.3853, lr_0 = 2.5788e-04
Loss = 3.3345e-01, PNorm = 48.0807, GNorm = 1.5902, lr_0 = 2.6163e-04
Loss = 3.2068e-01, PNorm = 48.0866, GNorm = 5.1956, lr_0 = 2.6537e-04
Loss = 3.3193e-01, PNorm = 48.0923, GNorm = 5.2244, lr_0 = 2.6912e-04
Loss = 3.3211e-01, PNorm = 48.0978, GNorm = 6.9228, lr_0 = 2.7288e-04
Loss = 3.7239e-01, PNorm = 48.1052, GNorm = 15.6060, lr_0 = 2.7663e-04
Loss = 3.4198e-01, PNorm = 48.1162, GNorm = 4.6504, lr_0 = 2.8038e-04
Loss = 3.3739e-01, PNorm = 48.1236, GNorm = 4.2692, lr_0 = 2.8413e-04
Loss = 3.5979e-01, PNorm = 48.1291, GNorm = 6.1020, lr_0 = 2.8787e-04
Loss = 2.9101e-01, PNorm = 48.1374, GNorm = 3.6766, lr_0 = 2.9163e-04
Loss = 3.5807e-01, PNorm = 48.1428, GNorm = 14.4531, lr_0 = 2.9538e-04
Loss = 3.0032e-01, PNorm = 48.1494, GNorm = 3.7577, lr_0 = 2.9913e-04
Loss = 3.0587e-01, PNorm = 48.1561, GNorm = 15.5343, lr_0 = 3.0288e-04
Loss = 3.2432e-01, PNorm = 48.1609, GNorm = 3.1868, lr_0 = 3.0662e-04
Loss = 3.7134e-01, PNorm = 48.1651, GNorm = 9.7681, lr_0 = 3.1037e-04
Loss = 2.9754e-01, PNorm = 48.1717, GNorm = 10.8414, lr_0 = 3.1413e-04
Loss = 3.4606e-01, PNorm = 48.1774, GNorm = 20.7789, lr_0 = 3.1788e-04
Loss = 4.1982e-01, PNorm = 48.1841, GNorm = 8.1874, lr_0 = 3.2163e-04
Loss = 4.0244e-01, PNorm = 48.1929, GNorm = 7.0458, lr_0 = 3.2538e-04
Loss = 3.6161e-01, PNorm = 48.2035, GNorm = 2.4716, lr_0 = 3.2912e-04
Loss = 3.6997e-01, PNorm = 48.2155, GNorm = 3.4251, lr_0 = 3.3288e-04
Loss = 3.1024e-01, PNorm = 48.2235, GNorm = 2.4875, lr_0 = 3.3663e-04
Loss = 2.7764e-01, PNorm = 48.2303, GNorm = 5.3377, lr_0 = 3.4038e-04
Loss = 2.6737e-01, PNorm = 48.2348, GNorm = 1.7333, lr_0 = 3.4413e-04
Loss = 3.1118e-01, PNorm = 48.2418, GNorm = 2.4889, lr_0 = 3.4787e-04
Loss = 3.4826e-01, PNorm = 48.2485, GNorm = 6.7579, lr_0 = 3.5162e-04
Loss = 3.4099e-01, PNorm = 48.2547, GNorm = 4.9309, lr_0 = 3.5538e-04
Loss = 3.7069e-01, PNorm = 48.2637, GNorm = 1.6744, lr_0 = 3.5913e-04
Loss = 3.4216e-01, PNorm = 48.2755, GNorm = 6.8067, lr_0 = 3.6288e-04
Loss = 3.2356e-01, PNorm = 48.2840, GNorm = 5.8189, lr_0 = 3.6662e-04
Loss = 3.2621e-01, PNorm = 48.2893, GNorm = 2.5977, lr_0 = 3.7037e-04
Loss = 2.9917e-01, PNorm = 48.2998, GNorm = 3.5689, lr_0 = 3.7413e-04
Loss = 3.1269e-01, PNorm = 48.3079, GNorm = 7.6735, lr_0 = 3.7788e-04
Loss = 2.8102e-01, PNorm = 48.3156, GNorm = 3.0789, lr_0 = 3.8163e-04
Loss = 2.6928e-01, PNorm = 48.3235, GNorm = 4.3274, lr_0 = 3.8537e-04
Loss = 2.9671e-01, PNorm = 48.3296, GNorm = 4.5463, lr_0 = 3.8912e-04
Loss = 2.5658e-01, PNorm = 48.3371, GNorm = 2.6342, lr_0 = 3.9287e-04
Loss = 2.7612e-01, PNorm = 48.3413, GNorm = 3.0562, lr_0 = 3.9663e-04
Loss = 2.7142e-01, PNorm = 48.3480, GNorm = 8.5346, lr_0 = 4.0038e-04
Loss = 2.9356e-01, PNorm = 48.3532, GNorm = 1.7368, lr_0 = 4.0413e-04
Loss = 3.2691e-01, PNorm = 48.3613, GNorm = 8.8813, lr_0 = 4.0787e-04
Loss = 3.4252e-01, PNorm = 48.3711, GNorm = 6.2075, lr_0 = 4.1162e-04
Loss = 2.9584e-01, PNorm = 48.3754, GNorm = 3.0989, lr_0 = 4.1537e-04
Loss = 2.9415e-01, PNorm = 48.3847, GNorm = 4.5735, lr_0 = 4.1913e-04
Loss = 3.2814e-01, PNorm = 48.3920, GNorm = 5.3274, lr_0 = 4.2288e-04
Loss = 3.0256e-01, PNorm = 48.4015, GNorm = 7.9098, lr_0 = 4.2662e-04
Loss = 3.9743e-01, PNorm = 48.4141, GNorm = 13.9677, lr_0 = 4.3037e-04
Loss = 3.1609e-01, PNorm = 48.4302, GNorm = 4.6690, lr_0 = 4.3412e-04
Loss = 3.2163e-01, PNorm = 48.4387, GNorm = 11.3384, lr_0 = 4.3788e-04
Loss = 2.8496e-01, PNorm = 48.4461, GNorm = 1.2566, lr_0 = 4.4163e-04
Loss = 3.0264e-01, PNorm = 48.4514, GNorm = 5.5230, lr_0 = 4.4538e-04
Loss = 3.1392e-01, PNorm = 48.4585, GNorm = 7.6901, lr_0 = 4.4912e-04
Loss = 3.0342e-01, PNorm = 48.4653, GNorm = 4.9519, lr_0 = 4.5287e-04
Loss = 3.6972e-01, PNorm = 48.4788, GNorm = 8.2899, lr_0 = 4.5662e-04
Loss = 3.2791e-01, PNorm = 48.4927, GNorm = 5.7467, lr_0 = 4.6038e-04
Loss = 2.8434e-01, PNorm = 48.5023, GNorm = 2.7536, lr_0 = 4.6413e-04
Loss = 3.2125e-01, PNorm = 48.5099, GNorm = 8.2644, lr_0 = 4.6787e-04
Loss = 3.6719e-01, PNorm = 48.5201, GNorm = 8.1326, lr_0 = 4.7162e-04
Loss = 3.0141e-01, PNorm = 48.5318, GNorm = 3.5842, lr_0 = 4.7537e-04
Loss = 2.8657e-01, PNorm = 48.5418, GNorm = 1.2031, lr_0 = 4.7913e-04
Loss = 2.5660e-01, PNorm = 48.5554, GNorm = 2.7880, lr_0 = 4.8288e-04
Loss = 2.4831e-01, PNorm = 48.5636, GNorm = 4.1630, lr_0 = 4.8663e-04
Loss = 3.0133e-01, PNorm = 48.5707, GNorm = 2.2194, lr_0 = 4.9038e-04
Loss = 2.7106e-01, PNorm = 48.5817, GNorm = 2.2864, lr_0 = 4.9412e-04
Loss = 2.7764e-01, PNorm = 48.5884, GNorm = 0.8901, lr_0 = 4.9788e-04
Loss = 2.9865e-01, PNorm = 48.6002, GNorm = 2.8759, lr_0 = 5.0163e-04
Loss = 2.7581e-01, PNorm = 48.6065, GNorm = 3.7484, lr_0 = 5.0538e-04
Loss = 3.0072e-01, PNorm = 48.6172, GNorm = 3.0263, lr_0 = 5.0913e-04
Loss = 2.5362e-01, PNorm = 48.6332, GNorm = 2.6970, lr_0 = 5.1287e-04
Loss = 2.6101e-01, PNorm = 48.6434, GNorm = 3.5222, lr_0 = 5.1663e-04
Loss = 2.8081e-01, PNorm = 48.6505, GNorm = 5.4044, lr_0 = 5.2038e-04
Loss = 2.8125e-01, PNorm = 48.6632, GNorm = 5.6302, lr_0 = 5.2413e-04
Loss = 2.7821e-01, PNorm = 48.6718, GNorm = 1.8698, lr_0 = 5.2788e-04
Loss = 2.7426e-01, PNorm = 48.6836, GNorm = 4.0799, lr_0 = 5.3162e-04
Loss = 3.0600e-01, PNorm = 48.6950, GNorm = 2.9450, lr_0 = 5.3538e-04
Loss = 3.1301e-01, PNorm = 48.7034, GNorm = 4.6647, lr_0 = 5.3912e-04
Loss = 2.8046e-01, PNorm = 48.7168, GNorm = 1.3831, lr_0 = 5.4288e-04
Loss = 2.6389e-01, PNorm = 48.7244, GNorm = 4.0464, lr_0 = 5.4663e-04
Loss = 2.5514e-01, PNorm = 48.7321, GNorm = 3.9238, lr_0 = 5.5038e-04
Validation mae = 0.320012
Epoch 1
Loss = 3.4238e-01, PNorm = 48.7444, GNorm = 7.4010, lr_0 = 5.5413e-04
Loss = 3.5889e-01, PNorm = 48.7620, GNorm = 5.7951, lr_0 = 5.5787e-04
Loss = 2.8341e-01, PNorm = 48.7822, GNorm = 1.3843, lr_0 = 5.6163e-04
Loss = 2.6520e-01, PNorm = 48.7974, GNorm = 7.6542, lr_0 = 5.6538e-04
Loss = 2.7638e-01, PNorm = 48.8107, GNorm = 1.4346, lr_0 = 5.6913e-04
Loss = 3.0526e-01, PNorm = 48.8237, GNorm = 6.1078, lr_0 = 5.7288e-04
Loss = 3.2330e-01, PNorm = 48.8283, GNorm = 7.0765, lr_0 = 5.7662e-04
Loss = 3.2513e-01, PNorm = 48.8428, GNorm = 3.9335, lr_0 = 5.8038e-04
Loss = 2.8920e-01, PNorm = 48.8600, GNorm = 7.6173, lr_0 = 5.8413e-04
Loss = 3.0599e-01, PNorm = 48.8758, GNorm = 3.4144, lr_0 = 5.8788e-04
Loss = 2.6340e-01, PNorm = 48.8896, GNorm = 7.1013, lr_0 = 5.9163e-04
Loss = 2.4604e-01, PNorm = 48.8995, GNorm = 1.8659, lr_0 = 5.9538e-04
Loss = 2.6994e-01, PNorm = 48.9079, GNorm = 4.8457, lr_0 = 5.9913e-04
Loss = 2.8080e-01, PNorm = 48.9171, GNorm = 3.8036, lr_0 = 6.0288e-04
Loss = 3.0792e-01, PNorm = 48.9294, GNorm = 2.2487, lr_0 = 6.0663e-04
Loss = 2.4380e-01, PNorm = 48.9385, GNorm = 2.8199, lr_0 = 6.1038e-04
Loss = 2.5116e-01, PNorm = 48.9480, GNorm = 2.4575, lr_0 = 6.1413e-04
Loss = 2.7872e-01, PNorm = 48.9557, GNorm = 6.9894, lr_0 = 6.1788e-04
Loss = 2.7065e-01, PNorm = 48.9697, GNorm = 3.3676, lr_0 = 6.2163e-04
Loss = 3.0502e-01, PNorm = 48.9797, GNorm = 1.5903, lr_0 = 6.2538e-04
Loss = 2.7957e-01, PNorm = 48.9950, GNorm = 4.0867, lr_0 = 6.2913e-04
Loss = 2.7719e-01, PNorm = 49.0117, GNorm = 1.1188, lr_0 = 6.3288e-04
Loss = 2.4275e-01, PNorm = 49.0284, GNorm = 2.0762, lr_0 = 6.3663e-04
Loss = 2.6146e-01, PNorm = 49.0362, GNorm = 0.7624, lr_0 = 6.4038e-04
Loss = 2.6586e-01, PNorm = 49.0443, GNorm = 5.4718, lr_0 = 6.4413e-04
Loss = 2.2865e-01, PNorm = 49.0572, GNorm = 5.6846, lr_0 = 6.4788e-04
Loss = 3.3900e-01, PNorm = 49.0724, GNorm = 8.6449, lr_0 = 6.5163e-04
Loss = 3.4334e-01, PNorm = 49.0890, GNorm = 1.8752, lr_0 = 6.5538e-04
Loss = 3.4931e-01, PNorm = 49.1052, GNorm = 5.0161, lr_0 = 6.5913e-04
Loss = 2.9337e-01, PNorm = 49.1303, GNorm = 1.6932, lr_0 = 6.6288e-04
Loss = 2.8633e-01, PNorm = 49.1433, GNorm = 1.7071, lr_0 = 6.6663e-04
Loss = 2.5347e-01, PNorm = 49.1563, GNorm = 3.1278, lr_0 = 6.7038e-04
Loss = 2.6596e-01, PNorm = 49.1659, GNorm = 1.7104, lr_0 = 6.7413e-04
Loss = 2.6830e-01, PNorm = 49.1762, GNorm = 1.0556, lr_0 = 6.7788e-04
Loss = 2.8895e-01, PNorm = 49.1974, GNorm = 2.8469, lr_0 = 6.8163e-04
Loss = 2.8300e-01, PNorm = 49.2124, GNorm = 3.6391, lr_0 = 6.8538e-04
Loss = 2.9333e-01, PNorm = 49.2309, GNorm = 1.6062, lr_0 = 6.8913e-04
Loss = 2.7141e-01, PNorm = 49.2418, GNorm = 3.9662, lr_0 = 6.9288e-04
Loss = 2.7279e-01, PNorm = 49.2567, GNorm = 4.4307, lr_0 = 6.9663e-04
Loss = 2.8633e-01, PNorm = 49.2671, GNorm = 4.3682, lr_0 = 7.0038e-04
Loss = 2.9595e-01, PNorm = 49.2838, GNorm = 3.9871, lr_0 = 7.0413e-04
Loss = 2.7670e-01, PNorm = 49.3035, GNorm = 2.0679, lr_0 = 7.0788e-04
Loss = 2.7923e-01, PNorm = 49.3259, GNorm = 3.2284, lr_0 = 7.1163e-04
Loss = 2.8232e-01, PNorm = 49.3399, GNorm = 1.0973, lr_0 = 7.1538e-04
Loss = 2.2664e-01, PNorm = 49.3554, GNorm = 2.9269, lr_0 = 7.1913e-04
Loss = 2.3846e-01, PNorm = 49.3674, GNorm = 1.1442, lr_0 = 7.2288e-04
Loss = 2.6949e-01, PNorm = 49.3723, GNorm = 2.9777, lr_0 = 7.2663e-04
Loss = 3.2205e-01, PNorm = 49.3932, GNorm = 0.8724, lr_0 = 7.3038e-04
Loss = 2.6676e-01, PNorm = 49.4127, GNorm = 3.8181, lr_0 = 7.3413e-04
Loss = 2.4422e-01, PNorm = 49.4295, GNorm = 0.9532, lr_0 = 7.3788e-04
Loss = 2.4678e-01, PNorm = 49.4405, GNorm = 4.2973, lr_0 = 7.4163e-04
Loss = 2.2802e-01, PNorm = 49.4554, GNorm = 0.9150, lr_0 = 7.4538e-04
Loss = 2.6171e-01, PNorm = 49.4596, GNorm = 0.7272, lr_0 = 7.4913e-04
Loss = 2.3766e-01, PNorm = 49.4769, GNorm = 3.1247, lr_0 = 7.5288e-04
Loss = 2.3740e-01, PNorm = 49.4972, GNorm = 4.4050, lr_0 = 7.5663e-04
Loss = 2.3779e-01, PNorm = 49.5164, GNorm = 4.0445, lr_0 = 7.6038e-04
Loss = 2.2517e-01, PNorm = 49.5340, GNorm = 1.0879, lr_0 = 7.6413e-04
Loss = 2.9226e-01, PNorm = 49.5503, GNorm = 2.4281, lr_0 = 7.6788e-04
Loss = 2.5322e-01, PNorm = 49.5719, GNorm = 8.6820, lr_0 = 7.7163e-04
Loss = 2.5013e-01, PNorm = 49.5924, GNorm = 2.8739, lr_0 = 7.7538e-04
Loss = 2.6110e-01, PNorm = 49.6073, GNorm = 1.4342, lr_0 = 7.7913e-04
Loss = 2.4800e-01, PNorm = 49.6255, GNorm = 1.1729, lr_0 = 7.8288e-04
Loss = 2.3701e-01, PNorm = 49.6360, GNorm = 0.6318, lr_0 = 7.8663e-04
Loss = 2.4132e-01, PNorm = 49.6532, GNorm = 4.9695, lr_0 = 7.9038e-04
Loss = 2.7599e-01, PNorm = 49.6688, GNorm = 2.2616, lr_0 = 7.9413e-04
Loss = 2.6669e-01, PNorm = 49.6880, GNorm = 2.1404, lr_0 = 7.9788e-04
Loss = 2.2321e-01, PNorm = 49.6999, GNorm = 1.1193, lr_0 = 8.0163e-04
Loss = 2.6029e-01, PNorm = 49.7119, GNorm = 2.8029, lr_0 = 8.0538e-04
Loss = 2.5412e-01, PNorm = 49.7289, GNorm = 1.3356, lr_0 = 8.0913e-04
Loss = 2.4817e-01, PNorm = 49.7544, GNorm = 1.8922, lr_0 = 8.1288e-04
Loss = 2.9512e-01, PNorm = 49.7655, GNorm = 1.4379, lr_0 = 8.1663e-04
Loss = 2.6201e-01, PNorm = 49.7775, GNorm = 0.8368, lr_0 = 8.2038e-04
Loss = 2.5647e-01, PNorm = 49.7947, GNorm = 5.3087, lr_0 = 8.2413e-04
Loss = 2.8617e-01, PNorm = 49.8088, GNorm = 2.7882, lr_0 = 8.2788e-04
Loss = 3.0526e-01, PNorm = 49.8435, GNorm = 1.1957, lr_0 = 8.3163e-04
Loss = 2.9262e-01, PNorm = 49.8753, GNorm = 1.5326, lr_0 = 8.3538e-04
Loss = 2.5474e-01, PNorm = 49.9023, GNorm = 2.0969, lr_0 = 8.3913e-04
Loss = 2.6479e-01, PNorm = 49.9231, GNorm = 0.6646, lr_0 = 8.4288e-04
Loss = 2.5517e-01, PNorm = 49.9432, GNorm = 2.6261, lr_0 = 8.4663e-04
Loss = 2.5315e-01, PNorm = 49.9576, GNorm = 1.2868, lr_0 = 8.5038e-04
Loss = 2.5255e-01, PNorm = 49.9738, GNorm = 3.1547, lr_0 = 8.5413e-04
Loss = 2.3159e-01, PNorm = 50.0027, GNorm = 2.2718, lr_0 = 8.5788e-04
Loss = 2.7851e-01, PNorm = 50.0215, GNorm = 2.0539, lr_0 = 8.6163e-04
Loss = 2.7262e-01, PNorm = 50.0425, GNorm = 4.1066, lr_0 = 8.6538e-04
Loss = 2.5972e-01, PNorm = 50.0618, GNorm = 1.7243, lr_0 = 8.6913e-04
Loss = 2.9958e-01, PNorm = 50.0802, GNorm = 5.4319, lr_0 = 8.7288e-04
Loss = 2.4905e-01, PNorm = 50.1007, GNorm = 3.0511, lr_0 = 8.7663e-04
Loss = 2.4966e-01, PNorm = 50.1197, GNorm = 0.8788, lr_0 = 8.8038e-04
Loss = 2.1554e-01, PNorm = 50.1420, GNorm = 1.3072, lr_0 = 8.8413e-04
Loss = 3.0739e-01, PNorm = 50.1560, GNorm = 0.7960, lr_0 = 8.8788e-04
Loss = 2.4936e-01, PNorm = 50.1709, GNorm = 0.9072, lr_0 = 8.9163e-04
Loss = 2.7856e-01, PNorm = 50.1956, GNorm = 2.6770, lr_0 = 8.9538e-04
Loss = 2.3249e-01, PNorm = 50.2188, GNorm = 4.7793, lr_0 = 8.9913e-04
Loss = 2.6608e-01, PNorm = 50.2504, GNorm = 2.3129, lr_0 = 9.0288e-04
Loss = 2.6672e-01, PNorm = 50.2710, GNorm = 0.9655, lr_0 = 9.0663e-04
Loss = 2.5408e-01, PNorm = 50.2895, GNorm = 5.3747, lr_0 = 9.1038e-04
Loss = 3.1716e-01, PNorm = 50.3099, GNorm = 1.8928, lr_0 = 9.1413e-04
Loss = 3.1679e-01, PNorm = 50.3319, GNorm = 2.1422, lr_0 = 9.1788e-04
Loss = 2.8644e-01, PNorm = 50.3692, GNorm = 4.7310, lr_0 = 9.2163e-04
Loss = 2.4094e-01, PNorm = 50.3912, GNorm = 0.9181, lr_0 = 9.2538e-04
Loss = 2.6835e-01, PNorm = 50.4220, GNorm = 2.4803, lr_0 = 9.2913e-04
Loss = 2.3994e-01, PNorm = 50.4425, GNorm = 0.7414, lr_0 = 9.3288e-04
Loss = 2.4560e-01, PNorm = 50.4625, GNorm = 3.8479, lr_0 = 9.3663e-04
Loss = 2.3093e-01, PNorm = 50.4954, GNorm = 0.8252, lr_0 = 9.4038e-04
Loss = 2.6972e-01, PNorm = 50.5228, GNorm = 3.3309, lr_0 = 9.4413e-04
Loss = 2.7587e-01, PNorm = 50.5414, GNorm = 1.6627, lr_0 = 9.4788e-04
Loss = 2.6542e-01, PNorm = 50.5706, GNorm = 2.0863, lr_0 = 9.5163e-04
Loss = 2.3922e-01, PNorm = 50.5990, GNorm = 5.1403, lr_0 = 9.5538e-04
Loss = 2.7454e-01, PNorm = 50.6230, GNorm = 1.9488, lr_0 = 9.5913e-04
Loss = 2.8323e-01, PNorm = 50.6520, GNorm = 3.3825, lr_0 = 9.6288e-04
Loss = 2.6156e-01, PNorm = 50.6848, GNorm = 1.9897, lr_0 = 9.6663e-04
Loss = 1.9864e-01, PNorm = 50.7043, GNorm = 1.6753, lr_0 = 9.7038e-04
Loss = 2.2670e-01, PNorm = 50.7199, GNorm = 2.8373, lr_0 = 9.7413e-04
Loss = 2.7082e-01, PNorm = 50.7342, GNorm = 4.8937, lr_0 = 9.7788e-04
Loss = 2.4820e-01, PNorm = 50.7530, GNorm = 3.8939, lr_0 = 9.8163e-04
Loss = 2.6508e-01, PNorm = 50.7633, GNorm = 2.6352, lr_0 = 9.8537e-04
Loss = 2.8188e-01, PNorm = 50.7921, GNorm = 2.9559, lr_0 = 9.8912e-04
Loss = 2.3985e-01, PNorm = 50.8109, GNorm = 3.5852, lr_0 = 9.9288e-04
Loss = 2.5043e-01, PNorm = 50.8408, GNorm = 3.0127, lr_0 = 9.9663e-04
Loss = 2.4246e-01, PNorm = 50.8684, GNorm = 2.4664, lr_0 = 9.9993e-04
Validation mae = 0.311564
Epoch 2
Loss = 2.5150e-01, PNorm = 50.8923, GNorm = 4.2020, lr_0 = 9.9925e-04
Loss = 2.5557e-01, PNorm = 50.9279, GNorm = 1.0266, lr_0 = 9.9856e-04
Loss = 2.1195e-01, PNorm = 50.9473, GNorm = 0.7111, lr_0 = 9.9788e-04
Loss = 2.4595e-01, PNorm = 50.9688, GNorm = 1.4466, lr_0 = 9.9719e-04
Loss = 2.3678e-01, PNorm = 50.9939, GNorm = 1.2887, lr_0 = 9.9651e-04
Loss = 2.4770e-01, PNorm = 51.0287, GNorm = 1.4783, lr_0 = 9.9583e-04
Loss = 2.5383e-01, PNorm = 51.0573, GNorm = 3.3969, lr_0 = 9.9515e-04
Loss = 2.5560e-01, PNorm = 51.0912, GNorm = 2.1058, lr_0 = 9.9446e-04
Loss = 2.4944e-01, PNorm = 51.1263, GNorm = 1.1321, lr_0 = 9.9378e-04
Loss = 2.4034e-01, PNorm = 51.1465, GNorm = 1.6781, lr_0 = 9.9310e-04
Loss = 2.7821e-01, PNorm = 51.1708, GNorm = 2.2036, lr_0 = 9.9242e-04
Loss = 2.7361e-01, PNorm = 51.2112, GNorm = 2.7759, lr_0 = 9.9174e-04
Loss = 2.5170e-01, PNorm = 51.2307, GNorm = 1.3307, lr_0 = 9.9106e-04
Loss = 2.8806e-01, PNorm = 51.2633, GNorm = 3.5185, lr_0 = 9.9038e-04
Loss = 2.8355e-01, PNorm = 51.2866, GNorm = 3.7715, lr_0 = 9.8971e-04
Loss = 2.4948e-01, PNorm = 51.3181, GNorm = 1.4180, lr_0 = 9.8903e-04
Loss = 2.7077e-01, PNorm = 51.3475, GNorm = 1.8147, lr_0 = 9.8835e-04
Loss = 2.6764e-01, PNorm = 51.3813, GNorm = 1.4608, lr_0 = 9.8767e-04
Loss = 2.6739e-01, PNorm = 51.4102, GNorm = 1.1605, lr_0 = 9.8700e-04
Loss = 2.5413e-01, PNorm = 51.4402, GNorm = 2.0188, lr_0 = 9.8632e-04
Loss = 2.7731e-01, PNorm = 51.4594, GNorm = 1.4678, lr_0 = 9.8564e-04
Loss = 2.2351e-01, PNorm = 51.4867, GNorm = 2.2672, lr_0 = 9.8497e-04
Loss = 2.1522e-01, PNorm = 51.5202, GNorm = 3.6208, lr_0 = 9.8429e-04
Loss = 2.4022e-01, PNorm = 51.5432, GNorm = 1.1970, lr_0 = 9.8362e-04
Loss = 2.5458e-01, PNorm = 51.5741, GNorm = 1.0657, lr_0 = 9.8295e-04
Loss = 2.4915e-01, PNorm = 51.5947, GNorm = 0.9698, lr_0 = 9.8227e-04
Loss = 2.2167e-01, PNorm = 51.6302, GNorm = 3.1434, lr_0 = 9.8160e-04
Loss = 2.5557e-01, PNorm = 51.6593, GNorm = 3.3085, lr_0 = 9.8093e-04
Loss = 2.4551e-01, PNorm = 51.6944, GNorm = 0.9857, lr_0 = 9.8026e-04
Loss = 2.2805e-01, PNorm = 51.7154, GNorm = 1.4620, lr_0 = 9.7958e-04
Loss = 2.3853e-01, PNorm = 51.7461, GNorm = 0.5009, lr_0 = 9.7891e-04
Loss = 2.2812e-01, PNorm = 51.7677, GNorm = 1.2670, lr_0 = 9.7824e-04
Loss = 2.3221e-01, PNorm = 51.7830, GNorm = 0.6852, lr_0 = 9.7757e-04
Loss = 2.2837e-01, PNorm = 51.8056, GNorm = 1.9885, lr_0 = 9.7690e-04
Loss = 2.5389e-01, PNorm = 51.8186, GNorm = 2.2912, lr_0 = 9.7623e-04
Loss = 2.3664e-01, PNorm = 51.8386, GNorm = 6.0006, lr_0 = 9.7556e-04
Loss = 2.6468e-01, PNorm = 51.8572, GNorm = 3.7090, lr_0 = 9.7490e-04
Loss = 2.3858e-01, PNorm = 51.8886, GNorm = 1.0814, lr_0 = 9.7423e-04
Loss = 2.3084e-01, PNorm = 51.9087, GNorm = 0.9494, lr_0 = 9.7356e-04
Loss = 2.0921e-01, PNorm = 51.9313, GNorm = 1.0604, lr_0 = 9.7289e-04
Loss = 2.5921e-01, PNorm = 51.9512, GNorm = 1.3272, lr_0 = 9.7223e-04
Loss = 2.3242e-01, PNorm = 51.9674, GNorm = 0.9464, lr_0 = 9.7156e-04
Loss = 2.2115e-01, PNorm = 51.9864, GNorm = 1.8380, lr_0 = 9.7090e-04
Loss = 2.3169e-01, PNorm = 52.0129, GNorm = 2.1373, lr_0 = 9.7023e-04
Loss = 2.4510e-01, PNorm = 52.0418, GNorm = 2.6965, lr_0 = 9.6957e-04
Loss = 2.3230e-01, PNorm = 52.0730, GNorm = 0.8591, lr_0 = 9.6890e-04
Loss = 2.2654e-01, PNorm = 52.1037, GNorm = 0.9501, lr_0 = 9.6824e-04
Loss = 2.4619e-01, PNorm = 52.1320, GNorm = 1.0547, lr_0 = 9.6757e-04
Loss = 2.1975e-01, PNorm = 52.1499, GNorm = 3.1336, lr_0 = 9.6691e-04
Loss = 2.5586e-01, PNorm = 52.1654, GNorm = 2.2438, lr_0 = 9.6625e-04
Loss = 2.4118e-01, PNorm = 52.1965, GNorm = 3.5818, lr_0 = 9.6559e-04
Loss = 2.3607e-01, PNorm = 52.2164, GNorm = 2.2735, lr_0 = 9.6493e-04
Loss = 2.4523e-01, PNorm = 52.2447, GNorm = 2.4820, lr_0 = 9.6427e-04
Loss = 2.2011e-01, PNorm = 52.2694, GNorm = 3.7111, lr_0 = 9.6360e-04
Loss = 2.5681e-01, PNorm = 52.3003, GNorm = 1.7815, lr_0 = 9.6294e-04
Loss = 2.4567e-01, PNorm = 52.3256, GNorm = 1.9890, lr_0 = 9.6228e-04
Loss = 2.3414e-01, PNorm = 52.3516, GNorm = 0.7067, lr_0 = 9.6163e-04
Loss = 2.3818e-01, PNorm = 52.3713, GNorm = 3.8296, lr_0 = 9.6097e-04
Loss = 2.2788e-01, PNorm = 52.3948, GNorm = 1.0961, lr_0 = 9.6031e-04
Loss = 1.9379e-01, PNorm = 52.4114, GNorm = 0.9260, lr_0 = 9.5965e-04
Loss = 2.4053e-01, PNorm = 52.4275, GNorm = 0.9638, lr_0 = 9.5899e-04
Loss = 2.0442e-01, PNorm = 52.4463, GNorm = 2.0815, lr_0 = 9.5834e-04
Loss = 2.4872e-01, PNorm = 52.4636, GNorm = 0.8537, lr_0 = 9.5768e-04
Loss = 2.0816e-01, PNorm = 52.4812, GNorm = 0.9278, lr_0 = 9.5702e-04
Loss = 2.1094e-01, PNorm = 52.5074, GNorm = 2.2674, lr_0 = 9.5637e-04
Loss = 2.1327e-01, PNorm = 52.5211, GNorm = 0.9990, lr_0 = 9.5571e-04
Loss = 2.2031e-01, PNorm = 52.5484, GNorm = 4.3836, lr_0 = 9.5506e-04
Loss = 2.3673e-01, PNorm = 52.5793, GNorm = 0.8562, lr_0 = 9.5440e-04
Loss = 2.0209e-01, PNorm = 52.6000, GNorm = 0.9059, lr_0 = 9.5375e-04
Loss = 2.1755e-01, PNorm = 52.6252, GNorm = 0.9117, lr_0 = 9.5310e-04
Loss = 2.4412e-01, PNorm = 52.6442, GNorm = 2.0933, lr_0 = 9.5244e-04
Loss = 2.4390e-01, PNorm = 52.6792, GNorm = 1.7418, lr_0 = 9.5179e-04
Loss = 2.2728e-01, PNorm = 52.7091, GNorm = 1.1295, lr_0 = 9.5114e-04
Loss = 2.2617e-01, PNorm = 52.7352, GNorm = 0.6753, lr_0 = 9.5049e-04
Loss = 2.4817e-01, PNorm = 52.7654, GNorm = 2.0354, lr_0 = 9.4984e-04
Loss = 2.2151e-01, PNorm = 52.7889, GNorm = 0.8840, lr_0 = 9.4919e-04
Loss = 2.5088e-01, PNorm = 52.8241, GNorm = 1.6842, lr_0 = 9.4854e-04
Loss = 2.3673e-01, PNorm = 52.8586, GNorm = 2.2986, lr_0 = 9.4789e-04
Loss = 2.2087e-01, PNorm = 52.8884, GNorm = 0.8443, lr_0 = 9.4724e-04
Loss = 2.4597e-01, PNorm = 52.9066, GNorm = 1.0640, lr_0 = 9.4659e-04
Loss = 2.3487e-01, PNorm = 52.9367, GNorm = 1.2337, lr_0 = 9.4594e-04
Loss = 2.4731e-01, PNorm = 52.9544, GNorm = 0.8547, lr_0 = 9.4529e-04
Loss = 2.3479e-01, PNorm = 52.9826, GNorm = 1.1625, lr_0 = 9.4464e-04
Loss = 2.0504e-01, PNorm = 52.9945, GNorm = 1.1234, lr_0 = 9.4400e-04
Loss = 2.0339e-01, PNorm = 53.0158, GNorm = 1.2704, lr_0 = 9.4335e-04
Loss = 1.9708e-01, PNorm = 53.0346, GNorm = 1.7227, lr_0 = 9.4270e-04
Loss = 2.2323e-01, PNorm = 53.0540, GNorm = 3.4993, lr_0 = 9.4206e-04
Loss = 2.0392e-01, PNorm = 53.0754, GNorm = 1.0383, lr_0 = 9.4141e-04
Loss = 1.9957e-01, PNorm = 53.0972, GNorm = 0.9799, lr_0 = 9.4077e-04
Loss = 2.2701e-01, PNorm = 53.1123, GNorm = 1.1216, lr_0 = 9.4012e-04
Loss = 1.7935e-01, PNorm = 53.1302, GNorm = 2.1274, lr_0 = 9.3948e-04
Loss = 2.1157e-01, PNorm = 53.1489, GNorm = 2.5787, lr_0 = 9.3884e-04
Loss = 2.2308e-01, PNorm = 53.1772, GNorm = 1.9876, lr_0 = 9.3819e-04
Loss = 2.4335e-01, PNorm = 53.2106, GNorm = 0.8644, lr_0 = 9.3755e-04
Loss = 2.1577e-01, PNorm = 53.2260, GNorm = 0.8019, lr_0 = 9.3691e-04
Loss = 2.0296e-01, PNorm = 53.2505, GNorm = 0.9651, lr_0 = 9.3627e-04
Loss = 2.1388e-01, PNorm = 53.2726, GNorm = 0.7703, lr_0 = 9.3562e-04
Loss = 2.1484e-01, PNorm = 53.2932, GNorm = 2.7529, lr_0 = 9.3498e-04
Loss = 2.0931e-01, PNorm = 53.3206, GNorm = 1.1366, lr_0 = 9.3434e-04
Loss = 2.3648e-01, PNorm = 53.3434, GNorm = 1.6463, lr_0 = 9.3370e-04
Loss = 1.8334e-01, PNorm = 53.3720, GNorm = 1.4318, lr_0 = 9.3306e-04
Loss = 2.4208e-01, PNorm = 53.3940, GNorm = 3.1340, lr_0 = 9.3242e-04
Loss = 2.2289e-01, PNorm = 53.4144, GNorm = 1.5841, lr_0 = 9.3178e-04
Loss = 2.2848e-01, PNorm = 53.4392, GNorm = 0.7276, lr_0 = 9.3115e-04
Loss = 2.3303e-01, PNorm = 53.4742, GNorm = 2.1513, lr_0 = 9.3051e-04
Loss = 2.1024e-01, PNorm = 53.5054, GNorm = 1.1197, lr_0 = 9.2987e-04
Loss = 2.1438e-01, PNorm = 53.5315, GNorm = 2.1644, lr_0 = 9.2923e-04
Loss = 2.2082e-01, PNorm = 53.5517, GNorm = 0.8607, lr_0 = 9.2860e-04
Loss = 2.1391e-01, PNorm = 53.5802, GNorm = 1.0999, lr_0 = 9.2796e-04
Loss = 2.1707e-01, PNorm = 53.6022, GNorm = 1.0821, lr_0 = 9.2733e-04
Loss = 2.3331e-01, PNorm = 53.6266, GNorm = 1.2648, lr_0 = 9.2669e-04
Loss = 2.4799e-01, PNorm = 53.6482, GNorm = 1.5956, lr_0 = 9.2606e-04
Loss = 2.6005e-01, PNorm = 53.6850, GNorm = 2.7902, lr_0 = 9.2542e-04
Loss = 2.4335e-01, PNorm = 53.7108, GNorm = 1.4131, lr_0 = 9.2479e-04
Loss = 2.4003e-01, PNorm = 53.7444, GNorm = 2.3043, lr_0 = 9.2415e-04
Loss = 2.3413e-01, PNorm = 53.7714, GNorm = 0.7123, lr_0 = 9.2352e-04
Loss = 2.3928e-01, PNorm = 53.8024, GNorm = 2.0721, lr_0 = 9.2289e-04
Loss = 1.7965e-01, PNorm = 53.8293, GNorm = 0.9315, lr_0 = 9.2226e-04
Loss = 2.1367e-01, PNorm = 53.8592, GNorm = 1.1014, lr_0 = 9.2162e-04
Loss = 1.9141e-01, PNorm = 53.8811, GNorm = 1.1994, lr_0 = 9.2099e-04
Validation mae = 0.272957
Epoch 3
Loss = 2.3600e-01, PNorm = 53.9069, GNorm = 1.5022, lr_0 = 9.2036e-04
Loss = 2.3057e-01, PNorm = 53.9379, GNorm = 2.6324, lr_0 = 9.1973e-04
Loss = 2.0275e-01, PNorm = 53.9650, GNorm = 0.6299, lr_0 = 9.1910e-04
Loss = 2.2310e-01, PNorm = 53.9819, GNorm = 2.6271, lr_0 = 9.1847e-04
Loss = 1.7595e-01, PNorm = 54.0084, GNorm = 1.1178, lr_0 = 9.1784e-04
Loss = 1.7603e-01, PNorm = 54.0354, GNorm = 0.8225, lr_0 = 9.1721e-04
Loss = 1.8697e-01, PNorm = 54.0595, GNorm = 0.6631, lr_0 = 9.1658e-04
Loss = 1.9602e-01, PNorm = 54.0835, GNorm = 1.2716, lr_0 = 9.1596e-04
Loss = 2.2113e-01, PNorm = 54.1081, GNorm = 0.9461, lr_0 = 9.1533e-04
Loss = 1.9576e-01, PNorm = 54.1442, GNorm = 0.8478, lr_0 = 9.1470e-04
Loss = 2.0589e-01, PNorm = 54.1633, GNorm = 1.1542, lr_0 = 9.1408e-04
Loss = 2.0375e-01, PNorm = 54.1946, GNorm = 3.8342, lr_0 = 9.1345e-04
Loss = 2.0949e-01, PNorm = 54.2235, GNorm = 2.4177, lr_0 = 9.1282e-04
Loss = 1.6869e-01, PNorm = 54.2496, GNorm = 1.1708, lr_0 = 9.1220e-04
Loss = 1.9978e-01, PNorm = 54.2738, GNorm = 0.8835, lr_0 = 9.1157e-04
Loss = 2.2547e-01, PNorm = 54.2959, GNorm = 2.0496, lr_0 = 9.1095e-04
Loss = 2.4339e-01, PNorm = 54.3238, GNorm = 0.6398, lr_0 = 9.1032e-04
Loss = 2.0537e-01, PNorm = 54.3589, GNorm = 0.7164, lr_0 = 9.0970e-04
Loss = 2.2734e-01, PNorm = 54.3873, GNorm = 1.6495, lr_0 = 9.0908e-04
Loss = 2.1701e-01, PNorm = 54.4160, GNorm = 0.7408, lr_0 = 9.0846e-04
Loss = 1.7496e-01, PNorm = 54.4382, GNorm = 1.3234, lr_0 = 9.0783e-04
Loss = 1.9849e-01, PNorm = 54.4656, GNorm = 0.7226, lr_0 = 9.0721e-04
Loss = 2.0825e-01, PNorm = 54.4900, GNorm = 2.8161, lr_0 = 9.0659e-04
Loss = 2.4315e-01, PNorm = 54.5169, GNorm = 1.2173, lr_0 = 9.0597e-04
Loss = 2.2544e-01, PNorm = 54.5344, GNorm = 1.0915, lr_0 = 9.0535e-04
Loss = 2.4072e-01, PNorm = 54.5678, GNorm = 4.7482, lr_0 = 9.0473e-04
Loss = 2.0927e-01, PNorm = 54.5962, GNorm = 0.7611, lr_0 = 9.0411e-04
Loss = 2.2841e-01, PNorm = 54.6162, GNorm = 1.0706, lr_0 = 9.0349e-04
Loss = 2.2629e-01, PNorm = 54.6378, GNorm = 1.0473, lr_0 = 9.0287e-04
Loss = 2.3363e-01, PNorm = 54.6724, GNorm = 1.0028, lr_0 = 9.0225e-04
Loss = 2.2241e-01, PNorm = 54.7114, GNorm = 1.0370, lr_0 = 9.0163e-04
Loss = 1.9481e-01, PNorm = 54.7391, GNorm = 1.5541, lr_0 = 9.0102e-04
Loss = 2.2293e-01, PNorm = 54.7667, GNorm = 1.6272, lr_0 = 9.0040e-04
Loss = 2.0240e-01, PNorm = 54.8009, GNorm = 0.8715, lr_0 = 8.9978e-04
Loss = 2.1776e-01, PNorm = 54.8340, GNorm = 2.5274, lr_0 = 8.9916e-04
Loss = 2.2596e-01, PNorm = 54.8673, GNorm = 1.3342, lr_0 = 8.9855e-04
Loss = 2.1677e-01, PNorm = 54.8971, GNorm = 2.9582, lr_0 = 8.9793e-04
Loss = 1.9940e-01, PNorm = 54.9236, GNorm = 1.5746, lr_0 = 8.9732e-04
Loss = 2.2406e-01, PNorm = 54.9477, GNorm = 1.3808, lr_0 = 8.9670e-04
Loss = 1.8788e-01, PNorm = 54.9725, GNorm = 0.8209, lr_0 = 8.9609e-04
Loss = 2.1504e-01, PNorm = 54.9953, GNorm = 0.7670, lr_0 = 8.9548e-04
Loss = 1.8206e-01, PNorm = 55.0145, GNorm = 1.9198, lr_0 = 8.9486e-04
Loss = 1.9505e-01, PNorm = 55.0427, GNorm = 0.6516, lr_0 = 8.9425e-04
Loss = 2.1419e-01, PNorm = 55.0751, GNorm = 1.2185, lr_0 = 8.9364e-04
Loss = 1.9459e-01, PNorm = 55.0935, GNorm = 0.6685, lr_0 = 8.9302e-04
Loss = 1.8706e-01, PNorm = 55.1078, GNorm = 2.6626, lr_0 = 8.9241e-04
Loss = 2.1357e-01, PNorm = 55.1204, GNorm = 3.2219, lr_0 = 8.9180e-04
Loss = 2.2406e-01, PNorm = 55.1469, GNorm = 0.7966, lr_0 = 8.9119e-04
Loss = 2.3005e-01, PNorm = 55.1771, GNorm = 1.0434, lr_0 = 8.9058e-04
Loss = 1.9375e-01, PNorm = 55.2098, GNorm = 1.4691, lr_0 = 8.8997e-04
Loss = 2.2701e-01, PNorm = 55.2459, GNorm = 2.2705, lr_0 = 8.8936e-04
Loss = 2.2743e-01, PNorm = 55.2790, GNorm = 1.5014, lr_0 = 8.8875e-04
Loss = 1.9288e-01, PNorm = 55.3089, GNorm = 2.8501, lr_0 = 8.8814e-04
Loss = 1.9383e-01, PNorm = 55.3313, GNorm = 1.3590, lr_0 = 8.8753e-04
Loss = 2.1616e-01, PNorm = 55.3598, GNorm = 1.0588, lr_0 = 8.8693e-04
Loss = 2.1471e-01, PNorm = 55.3823, GNorm = 0.8942, lr_0 = 8.8632e-04
Loss = 2.1395e-01, PNorm = 55.4034, GNorm = 0.7176, lr_0 = 8.8571e-04
Loss = 2.2443e-01, PNorm = 55.4335, GNorm = 1.1879, lr_0 = 8.8510e-04
Loss = 1.7521e-01, PNorm = 55.4545, GNorm = 0.8588, lr_0 = 8.8450e-04
Loss = 2.0263e-01, PNorm = 55.4803, GNorm = 1.6693, lr_0 = 8.8389e-04
Loss = 2.0463e-01, PNorm = 55.5073, GNorm = 0.8421, lr_0 = 8.8329e-04
Loss = 1.9139e-01, PNorm = 55.5371, GNorm = 0.6321, lr_0 = 8.8268e-04
Loss = 2.0643e-01, PNorm = 55.5574, GNorm = 0.8161, lr_0 = 8.8208e-04
Loss = 2.2378e-01, PNorm = 55.5825, GNorm = 1.4456, lr_0 = 8.8147e-04
Loss = 1.8566e-01, PNorm = 55.6055, GNorm = 0.8989, lr_0 = 8.8087e-04
Loss = 1.8532e-01, PNorm = 55.6293, GNorm = 1.6928, lr_0 = 8.8026e-04
Loss = 1.9923e-01, PNorm = 55.6538, GNorm = 0.7461, lr_0 = 8.7966e-04
Loss = 2.0227e-01, PNorm = 55.6787, GNorm = 1.2383, lr_0 = 8.7906e-04
Loss = 2.0535e-01, PNorm = 55.6947, GNorm = 1.8155, lr_0 = 8.7846e-04
Loss = 2.0743e-01, PNorm = 55.7191, GNorm = 1.1008, lr_0 = 8.7785e-04
Loss = 2.0789e-01, PNorm = 55.7466, GNorm = 1.3853, lr_0 = 8.7725e-04
Loss = 2.1820e-01, PNorm = 55.7792, GNorm = 1.6243, lr_0 = 8.7665e-04
Loss = 1.6814e-01, PNorm = 55.7977, GNorm = 0.7137, lr_0 = 8.7605e-04
Loss = 1.9936e-01, PNorm = 55.8224, GNorm = 0.9871, lr_0 = 8.7545e-04
Loss = 1.7236e-01, PNorm = 55.8445, GNorm = 1.1842, lr_0 = 8.7485e-04
Loss = 1.8646e-01, PNorm = 55.8649, GNorm = 0.9487, lr_0 = 8.7425e-04
Loss = 2.2080e-01, PNorm = 55.8915, GNorm = 1.4136, lr_0 = 8.7365e-04
Loss = 2.0717e-01, PNorm = 55.9185, GNorm = 0.8390, lr_0 = 8.7306e-04
Loss = 1.9215e-01, PNorm = 55.9531, GNorm = 1.0111, lr_0 = 8.7246e-04
Loss = 2.0301e-01, PNorm = 55.9788, GNorm = 0.6859, lr_0 = 8.7186e-04
Loss = 2.0468e-01, PNorm = 56.0072, GNorm = 1.9078, lr_0 = 8.7126e-04
Loss = 2.1828e-01, PNorm = 56.0337, GNorm = 1.7307, lr_0 = 8.7067e-04
Loss = 2.2069e-01, PNorm = 56.0668, GNorm = 1.5707, lr_0 = 8.7007e-04
Loss = 2.1468e-01, PNorm = 56.0967, GNorm = 2.8708, lr_0 = 8.6947e-04
Loss = 2.2022e-01, PNorm = 56.1308, GNorm = 0.7767, lr_0 = 8.6888e-04
Loss = 2.1505e-01, PNorm = 56.1610, GNorm = 0.8704, lr_0 = 8.6828e-04
Loss = 1.9410e-01, PNorm = 56.1867, GNorm = 1.3366, lr_0 = 8.6769e-04
Loss = 1.9944e-01, PNorm = 56.2120, GNorm = 2.4832, lr_0 = 8.6709e-04
Loss = 2.1595e-01, PNorm = 56.2354, GNorm = 1.0909, lr_0 = 8.6650e-04
Loss = 1.9273e-01, PNorm = 56.2694, GNorm = 1.6114, lr_0 = 8.6590e-04
Loss = 2.1788e-01, PNorm = 56.2881, GNorm = 1.4841, lr_0 = 8.6531e-04
Loss = 2.2408e-01, PNorm = 56.3058, GNorm = 0.7959, lr_0 = 8.6472e-04
Loss = 1.8125e-01, PNorm = 56.3297, GNorm = 1.3291, lr_0 = 8.6413e-04
Loss = 2.4463e-01, PNorm = 56.3508, GNorm = 3.4040, lr_0 = 8.6353e-04
Loss = 2.3692e-01, PNorm = 56.3785, GNorm = 1.9874, lr_0 = 8.6294e-04
Loss = 1.8974e-01, PNorm = 56.4152, GNorm = 1.7052, lr_0 = 8.6235e-04
Loss = 2.0955e-01, PNorm = 56.4415, GNorm = 2.3359, lr_0 = 8.6176e-04
Loss = 2.1576e-01, PNorm = 56.4690, GNorm = 2.6377, lr_0 = 8.6117e-04
Loss = 2.1319e-01, PNorm = 56.5018, GNorm = 2.3368, lr_0 = 8.6058e-04
Loss = 2.0216e-01, PNorm = 56.5261, GNorm = 1.4526, lr_0 = 8.5999e-04
Loss = 2.1320e-01, PNorm = 56.5625, GNorm = 1.3833, lr_0 = 8.5940e-04
Loss = 1.8134e-01, PNorm = 56.5858, GNorm = 0.6669, lr_0 = 8.5881e-04
Loss = 2.0103e-01, PNorm = 56.6118, GNorm = 2.1758, lr_0 = 8.5823e-04
Loss = 2.1300e-01, PNorm = 56.6362, GNorm = 1.0443, lr_0 = 8.5764e-04
Loss = 1.7822e-01, PNorm = 56.6616, GNorm = 0.9781, lr_0 = 8.5705e-04
Loss = 2.1800e-01, PNorm = 56.6831, GNorm = 1.4036, lr_0 = 8.5646e-04
Loss = 2.1400e-01, PNorm = 56.6909, GNorm = 1.1375, lr_0 = 8.5588e-04
Loss = 2.1077e-01, PNorm = 56.7200, GNorm = 0.9258, lr_0 = 8.5529e-04
Loss = 2.1637e-01, PNorm = 56.7424, GNorm = 1.7061, lr_0 = 8.5470e-04
Loss = 1.8183e-01, PNorm = 56.7732, GNorm = 1.1969, lr_0 = 8.5412e-04
Loss = 1.8675e-01, PNorm = 56.7980, GNorm = 0.9159, lr_0 = 8.5353e-04
Loss = 1.7640e-01, PNorm = 56.8219, GNorm = 1.1636, lr_0 = 8.5295e-04
Loss = 1.8223e-01, PNorm = 56.8457, GNorm = 0.5892, lr_0 = 8.5236e-04
Loss = 1.7942e-01, PNorm = 56.8673, GNorm = 0.9608, lr_0 = 8.5178e-04
Loss = 1.8389e-01, PNorm = 56.8766, GNorm = 1.1943, lr_0 = 8.5120e-04
Loss = 2.1136e-01, PNorm = 56.8896, GNorm = 0.5388, lr_0 = 8.5061e-04
Loss = 2.0479e-01, PNorm = 56.9142, GNorm = 1.6599, lr_0 = 8.5003e-04
Loss = 2.1273e-01, PNorm = 56.9398, GNorm = 0.7075, lr_0 = 8.4945e-04
Loss = 2.0029e-01, PNorm = 56.9594, GNorm = 1.7212, lr_0 = 8.4887e-04
Loss = 2.0993e-01, PNorm = 56.9818, GNorm = 1.1057, lr_0 = 8.4828e-04
Validation mae = 0.292474
Epoch 4
Loss = 2.2422e-01, PNorm = 57.0112, GNorm = 1.9451, lr_0 = 8.4770e-04
Loss = 1.7966e-01, PNorm = 57.0507, GNorm = 1.0181, lr_0 = 8.4712e-04
Loss = 2.0478e-01, PNorm = 57.0755, GNorm = 0.6675, lr_0 = 8.4654e-04
Loss = 1.7941e-01, PNorm = 57.1018, GNorm = 1.7333, lr_0 = 8.4596e-04
Loss = 2.0760e-01, PNorm = 57.1246, GNorm = 1.6308, lr_0 = 8.4538e-04
Loss = 2.1991e-01, PNorm = 57.1490, GNorm = 1.4198, lr_0 = 8.4480e-04
Loss = 2.0000e-01, PNorm = 57.1804, GNorm = 1.4244, lr_0 = 8.4423e-04
Loss = 2.1416e-01, PNorm = 57.2002, GNorm = 2.0571, lr_0 = 8.4365e-04
Loss = 1.9883e-01, PNorm = 57.2429, GNorm = 1.9281, lr_0 = 8.4307e-04
Loss = 2.1126e-01, PNorm = 57.2821, GNorm = 1.2223, lr_0 = 8.4249e-04
Loss = 2.3092e-01, PNorm = 57.3096, GNorm = 1.1206, lr_0 = 8.4191e-04
Loss = 1.9926e-01, PNorm = 57.3461, GNorm = 1.9403, lr_0 = 8.4134e-04
Loss = 1.9815e-01, PNorm = 57.3726, GNorm = 1.1284, lr_0 = 8.4076e-04
Loss = 1.8686e-01, PNorm = 57.3975, GNorm = 1.0711, lr_0 = 8.4019e-04
Loss = 1.9484e-01, PNorm = 57.4212, GNorm = 0.6460, lr_0 = 8.3961e-04
Loss = 1.8072e-01, PNorm = 57.4393, GNorm = 0.8122, lr_0 = 8.3903e-04
Loss = 1.9215e-01, PNorm = 57.4635, GNorm = 1.6925, lr_0 = 8.3846e-04
Loss = 1.8717e-01, PNorm = 57.4829, GNorm = 2.1074, lr_0 = 8.3789e-04
Loss = 1.8180e-01, PNorm = 57.5093, GNorm = 0.7738, lr_0 = 8.3731e-04
Loss = 1.7295e-01, PNorm = 57.5316, GNorm = 0.6145, lr_0 = 8.3674e-04
Loss = 1.7680e-01, PNorm = 57.5549, GNorm = 0.6970, lr_0 = 8.3616e-04
Loss = 1.7772e-01, PNorm = 57.5748, GNorm = 1.7120, lr_0 = 8.3559e-04
Loss = 1.6951e-01, PNorm = 57.5828, GNorm = 1.3553, lr_0 = 8.3502e-04
Loss = 1.8862e-01, PNorm = 57.6030, GNorm = 0.9746, lr_0 = 8.3445e-04
Loss = 1.8933e-01, PNorm = 57.6256, GNorm = 0.5500, lr_0 = 8.3388e-04
Loss = 1.9579e-01, PNorm = 57.6457, GNorm = 0.8868, lr_0 = 8.3330e-04
Loss = 1.8475e-01, PNorm = 57.6749, GNorm = 0.8275, lr_0 = 8.3273e-04
Loss = 1.8546e-01, PNorm = 57.6920, GNorm = 1.0663, lr_0 = 8.3216e-04
Loss = 2.0191e-01, PNorm = 57.7222, GNorm = 1.1588, lr_0 = 8.3159e-04
Loss = 2.0352e-01, PNorm = 57.7542, GNorm = 0.8800, lr_0 = 8.3102e-04
Loss = 1.9464e-01, PNorm = 57.7873, GNorm = 1.2077, lr_0 = 8.3045e-04
Loss = 2.1405e-01, PNorm = 57.8145, GNorm = 1.4150, lr_0 = 8.2988e-04
Loss = 2.0379e-01, PNorm = 57.8426, GNorm = 2.2152, lr_0 = 8.2932e-04
Loss = 2.1147e-01, PNorm = 57.8832, GNorm = 1.1237, lr_0 = 8.2875e-04
Loss = 1.8654e-01, PNorm = 57.9062, GNorm = 1.1987, lr_0 = 8.2818e-04
Loss = 1.7569e-01, PNorm = 57.9305, GNorm = 0.8603, lr_0 = 8.2761e-04
Loss = 1.8274e-01, PNorm = 57.9531, GNorm = 1.3300, lr_0 = 8.2705e-04
Loss = 1.8594e-01, PNorm = 57.9800, GNorm = 0.6274, lr_0 = 8.2648e-04
Loss = 1.9178e-01, PNorm = 57.9931, GNorm = 0.6802, lr_0 = 8.2591e-04
Loss = 1.6427e-01, PNorm = 58.0133, GNorm = 0.4995, lr_0 = 8.2535e-04
Loss = 1.8014e-01, PNorm = 58.0334, GNorm = 0.6231, lr_0 = 8.2478e-04
Loss = 1.7241e-01, PNorm = 58.0440, GNorm = 0.7881, lr_0 = 8.2422e-04
Loss = 1.9543e-01, PNorm = 58.0628, GNorm = 0.6014, lr_0 = 8.2365e-04
Loss = 1.9693e-01, PNorm = 58.0931, GNorm = 0.7340, lr_0 = 8.2309e-04
Loss = 2.0113e-01, PNorm = 58.1097, GNorm = 0.6528, lr_0 = 8.2252e-04
Loss = 1.7516e-01, PNorm = 58.1417, GNorm = 0.7737, lr_0 = 8.2196e-04
Loss = 2.1098e-01, PNorm = 58.1704, GNorm = 0.6823, lr_0 = 8.2140e-04
Loss = 1.9843e-01, PNorm = 58.2157, GNorm = 0.8841, lr_0 = 8.2084e-04
Loss = 2.0748e-01, PNorm = 58.2370, GNorm = 0.8489, lr_0 = 8.2027e-04
Loss = 2.1385e-01, PNorm = 58.2593, GNorm = 1.6459, lr_0 = 8.1971e-04
Loss = 1.8225e-01, PNorm = 58.2799, GNorm = 1.8702, lr_0 = 8.1915e-04
Loss = 1.8479e-01, PNorm = 58.3037, GNorm = 2.1830, lr_0 = 8.1859e-04
Loss = 2.0703e-01, PNorm = 58.3357, GNorm = 2.5520, lr_0 = 8.1803e-04
Loss = 1.9808e-01, PNorm = 58.3590, GNorm = 1.5568, lr_0 = 8.1747e-04
Loss = 1.9487e-01, PNorm = 58.3902, GNorm = 2.0022, lr_0 = 8.1691e-04
Loss = 1.8299e-01, PNorm = 58.4124, GNorm = 1.0202, lr_0 = 8.1635e-04
Loss = 1.9415e-01, PNorm = 58.4378, GNorm = 1.6567, lr_0 = 8.1579e-04
Loss = 1.9719e-01, PNorm = 58.4593, GNorm = 1.4906, lr_0 = 8.1523e-04
Loss = 1.7571e-01, PNorm = 58.4823, GNorm = 0.6218, lr_0 = 8.1467e-04
Loss = 1.8333e-01, PNorm = 58.4995, GNorm = 0.9862, lr_0 = 8.1411e-04
Loss = 1.8795e-01, PNorm = 58.5170, GNorm = 0.7296, lr_0 = 8.1355e-04
Loss = 2.1807e-01, PNorm = 58.5406, GNorm = 0.7099, lr_0 = 8.1300e-04
Loss = 1.7666e-01, PNorm = 58.5634, GNorm = 0.8798, lr_0 = 8.1244e-04
Loss = 2.2580e-01, PNorm = 58.5838, GNorm = 1.3187, lr_0 = 8.1188e-04
Loss = 1.7002e-01, PNorm = 58.6031, GNorm = 0.7390, lr_0 = 8.1133e-04
Loss = 1.6158e-01, PNorm = 58.6177, GNorm = 1.3387, lr_0 = 8.1077e-04
Loss = 1.9250e-01, PNorm = 58.6413, GNorm = 1.6067, lr_0 = 8.1022e-04
Loss = 1.7815e-01, PNorm = 58.6632, GNorm = 1.8726, lr_0 = 8.0966e-04
Loss = 2.3218e-01, PNorm = 58.6978, GNorm = 1.0708, lr_0 = 8.0911e-04
Loss = 1.9293e-01, PNorm = 58.7371, GNorm = 0.7660, lr_0 = 8.0855e-04
Loss = 1.8090e-01, PNorm = 58.7650, GNorm = 1.2078, lr_0 = 8.0800e-04
Loss = 1.9372e-01, PNorm = 58.7924, GNorm = 1.0701, lr_0 = 8.0745e-04
Loss = 1.6158e-01, PNorm = 58.8097, GNorm = 0.6255, lr_0 = 8.0689e-04
Loss = 1.6976e-01, PNorm = 58.8299, GNorm = 1.8075, lr_0 = 8.0634e-04
Loss = 1.7317e-01, PNorm = 58.8493, GNorm = 1.4426, lr_0 = 8.0579e-04
Loss = 1.8740e-01, PNorm = 58.8717, GNorm = 0.6742, lr_0 = 8.0523e-04
Loss = 1.7812e-01, PNorm = 58.8922, GNorm = 1.3726, lr_0 = 8.0468e-04
Loss = 1.8251e-01, PNorm = 58.9110, GNorm = 2.4002, lr_0 = 8.0413e-04
Loss = 1.6101e-01, PNorm = 58.9373, GNorm = 1.2134, lr_0 = 8.0358e-04
Loss = 1.6624e-01, PNorm = 58.9734, GNorm = 1.5328, lr_0 = 8.0303e-04
Loss = 1.9353e-01, PNorm = 59.0051, GNorm = 0.8267, lr_0 = 8.0248e-04
Loss = 1.7358e-01, PNorm = 59.0270, GNorm = 0.7084, lr_0 = 8.0193e-04
Loss = 1.9645e-01, PNorm = 59.0587, GNorm = 1.6889, lr_0 = 8.0138e-04
Loss = 1.9789e-01, PNorm = 59.0909, GNorm = 1.1688, lr_0 = 8.0083e-04
Loss = 1.6207e-01, PNorm = 59.1172, GNorm = 0.5590, lr_0 = 8.0028e-04
Loss = 1.8808e-01, PNorm = 59.1344, GNorm = 0.7849, lr_0 = 7.9974e-04
Loss = 1.8086e-01, PNorm = 59.1627, GNorm = 1.2817, lr_0 = 7.9919e-04
Loss = 2.0563e-01, PNorm = 59.1893, GNorm = 2.1026, lr_0 = 7.9864e-04
Loss = 1.9421e-01, PNorm = 59.2172, GNorm = 0.8098, lr_0 = 7.9809e-04
Loss = 2.0213e-01, PNorm = 59.2457, GNorm = 2.6741, lr_0 = 7.9755e-04
Loss = 2.1210e-01, PNorm = 59.2688, GNorm = 1.5940, lr_0 = 7.9700e-04
Loss = 2.0611e-01, PNorm = 59.2958, GNorm = 0.6585, lr_0 = 7.9645e-04
Loss = 1.7971e-01, PNorm = 59.3212, GNorm = 0.6311, lr_0 = 7.9591e-04
Loss = 2.0469e-01, PNorm = 59.3456, GNorm = 0.8460, lr_0 = 7.9536e-04
Loss = 1.6719e-01, PNorm = 59.3763, GNorm = 0.7216, lr_0 = 7.9482e-04
Loss = 1.8978e-01, PNorm = 59.3934, GNorm = 1.9865, lr_0 = 7.9427e-04
Loss = 1.9047e-01, PNorm = 59.4167, GNorm = 1.6896, lr_0 = 7.9373e-04
Loss = 2.1247e-01, PNorm = 59.4387, GNorm = 0.6407, lr_0 = 7.9319e-04
Loss = 1.5313e-01, PNorm = 59.4605, GNorm = 0.5675, lr_0 = 7.9264e-04
Loss = 1.9280e-01, PNorm = 59.4776, GNorm = 1.3907, lr_0 = 7.9210e-04
Loss = 1.6581e-01, PNorm = 59.5091, GNorm = 1.3859, lr_0 = 7.9156e-04
Loss = 1.8566e-01, PNorm = 59.5192, GNorm = 0.7573, lr_0 = 7.9101e-04
Loss = 1.8380e-01, PNorm = 59.5400, GNorm = 0.7928, lr_0 = 7.9047e-04
Loss = 1.9844e-01, PNorm = 59.5635, GNorm = 0.7980, lr_0 = 7.8993e-04
Loss = 1.9880e-01, PNorm = 59.5888, GNorm = 0.6541, lr_0 = 7.8939e-04
Loss = 1.8600e-01, PNorm = 59.6123, GNorm = 1.0932, lr_0 = 7.8885e-04
Loss = 1.7557e-01, PNorm = 59.6320, GNorm = 0.7904, lr_0 = 7.8831e-04
Loss = 2.0633e-01, PNorm = 59.6415, GNorm = 2.4079, lr_0 = 7.8777e-04
Loss = 1.8285e-01, PNorm = 59.6617, GNorm = 1.1840, lr_0 = 7.8723e-04
Loss = 2.1233e-01, PNorm = 59.6822, GNorm = 0.6948, lr_0 = 7.8669e-04
Loss = 2.1192e-01, PNorm = 59.7095, GNorm = 1.5613, lr_0 = 7.8615e-04
Loss = 1.6105e-01, PNorm = 59.7397, GNorm = 0.6942, lr_0 = 7.8561e-04
Loss = 1.7029e-01, PNorm = 59.7634, GNorm = 2.2312, lr_0 = 7.8507e-04
Loss = 2.2517e-01, PNorm = 59.7972, GNorm = 2.0832, lr_0 = 7.8454e-04
Loss = 1.6323e-01, PNorm = 59.8287, GNorm = 1.5334, lr_0 = 7.8400e-04
Loss = 1.9656e-01, PNorm = 59.8503, GNorm = 1.1925, lr_0 = 7.8346e-04
Loss = 1.9926e-01, PNorm = 59.8765, GNorm = 1.8858, lr_0 = 7.8293e-04
Loss = 1.9923e-01, PNorm = 59.9070, GNorm = 2.5584, lr_0 = 7.8239e-04
Loss = 1.9255e-01, PNorm = 59.9342, GNorm = 0.8297, lr_0 = 7.8185e-04
Loss = 1.7838e-01, PNorm = 59.9556, GNorm = 0.9795, lr_0 = 7.8132e-04
Validation mae = 0.259584
Epoch 5
Loss = 1.7332e-01, PNorm = 59.9805, GNorm = 0.7769, lr_0 = 7.8078e-04
Loss = 1.7263e-01, PNorm = 60.0033, GNorm = 1.2930, lr_0 = 7.8025e-04
Loss = 1.5894e-01, PNorm = 60.0220, GNorm = 1.2893, lr_0 = 7.7971e-04
Loss = 1.6812e-01, PNorm = 60.0473, GNorm = 0.8365, lr_0 = 7.7918e-04
Loss = 1.6293e-01, PNorm = 60.0734, GNorm = 0.6698, lr_0 = 7.7864e-04
Loss = 1.9185e-01, PNorm = 60.0961, GNorm = 1.7758, lr_0 = 7.7811e-04
Loss = 1.7922e-01, PNorm = 60.1272, GNorm = 1.9118, lr_0 = 7.7758e-04
Loss = 1.9163e-01, PNorm = 60.1484, GNorm = 1.1434, lr_0 = 7.7705e-04
Loss = 1.5976e-01, PNorm = 60.1754, GNorm = 1.7431, lr_0 = 7.7651e-04
Loss = 1.8062e-01, PNorm = 60.2059, GNorm = 0.9587, lr_0 = 7.7598e-04
Loss = 1.6713e-01, PNorm = 60.2340, GNorm = 0.8116, lr_0 = 7.7545e-04
Loss = 1.7485e-01, PNorm = 60.2549, GNorm = 1.8005, lr_0 = 7.7492e-04
Loss = 1.6049e-01, PNorm = 60.2767, GNorm = 0.6862, lr_0 = 7.7439e-04
Loss = 1.9581e-01, PNorm = 60.2955, GNorm = 1.1016, lr_0 = 7.7386e-04
Loss = 1.9468e-01, PNorm = 60.3319, GNorm = 0.8092, lr_0 = 7.7333e-04
Loss = 1.7754e-01, PNorm = 60.3533, GNorm = 0.8325, lr_0 = 7.7280e-04
Loss = 1.7530e-01, PNorm = 60.3740, GNorm = 1.0639, lr_0 = 7.7227e-04
Loss = 1.7107e-01, PNorm = 60.3975, GNorm = 0.7369, lr_0 = 7.7174e-04
Loss = 1.5403e-01, PNorm = 60.4215, GNorm = 0.6514, lr_0 = 7.7121e-04
Loss = 1.6089e-01, PNorm = 60.4399, GNorm = 0.9373, lr_0 = 7.7068e-04
Loss = 2.0534e-01, PNorm = 60.4636, GNorm = 1.2877, lr_0 = 7.7015e-04
Loss = 1.8357e-01, PNorm = 60.4917, GNorm = 0.5850, lr_0 = 7.6963e-04
Loss = 1.6550e-01, PNorm = 60.5187, GNorm = 0.6589, lr_0 = 7.6910e-04
Loss = 1.7935e-01, PNorm = 60.5415, GNorm = 1.2282, lr_0 = 7.6857e-04
Loss = 1.6921e-01, PNorm = 60.5693, GNorm = 0.9725, lr_0 = 7.6805e-04
Loss = 1.8569e-01, PNorm = 60.5936, GNorm = 1.2954, lr_0 = 7.6752e-04
Loss = 1.7306e-01, PNorm = 60.6203, GNorm = 0.7405, lr_0 = 7.6699e-04
Loss = 1.6814e-01, PNorm = 60.6502, GNorm = 2.4768, lr_0 = 7.6647e-04
Loss = 1.7934e-01, PNorm = 60.6812, GNorm = 0.7053, lr_0 = 7.6594e-04
Loss = 1.7708e-01, PNorm = 60.6966, GNorm = 1.0290, lr_0 = 7.6542e-04
Loss = 1.7863e-01, PNorm = 60.7175, GNorm = 1.1646, lr_0 = 7.6489e-04
Loss = 1.7077e-01, PNorm = 60.7431, GNorm = 0.7824, lr_0 = 7.6437e-04
Loss = 1.6311e-01, PNorm = 60.7650, GNorm = 1.1693, lr_0 = 7.6385e-04
Loss = 1.8852e-01, PNorm = 60.7943, GNorm = 0.7704, lr_0 = 7.6332e-04
Loss = 1.7366e-01, PNorm = 60.8230, GNorm = 0.9518, lr_0 = 7.6280e-04
Loss = 1.5452e-01, PNorm = 60.8484, GNorm = 0.6478, lr_0 = 7.6228e-04
Loss = 2.0425e-01, PNorm = 60.8725, GNorm = 0.6923, lr_0 = 7.6176e-04
Loss = 1.7341e-01, PNorm = 60.9030, GNorm = 0.7034, lr_0 = 7.6123e-04
Loss = 1.8003e-01, PNorm = 60.9263, GNorm = 0.7373, lr_0 = 7.6071e-04
Loss = 1.7525e-01, PNorm = 60.9492, GNorm = 0.8851, lr_0 = 7.6019e-04
Loss = 1.8981e-01, PNorm = 60.9803, GNorm = 1.4720, lr_0 = 7.5967e-04
Loss = 1.8558e-01, PNorm = 61.0107, GNorm = 1.2136, lr_0 = 7.5915e-04
Loss = 1.7343e-01, PNorm = 61.0280, GNorm = 0.6323, lr_0 = 7.5863e-04
Loss = 1.5412e-01, PNorm = 61.0605, GNorm = 1.4687, lr_0 = 7.5811e-04
Loss = 1.5793e-01, PNorm = 61.0768, GNorm = 0.9890, lr_0 = 7.5759e-04
Loss = 1.7713e-01, PNorm = 61.1008, GNorm = 1.1445, lr_0 = 7.5707e-04
Loss = 2.0415e-01, PNorm = 61.1223, GNorm = 0.7511, lr_0 = 7.5655e-04
Loss = 1.7029e-01, PNorm = 61.1497, GNorm = 0.6447, lr_0 = 7.5603e-04
Loss = 1.9218e-01, PNorm = 61.1800, GNorm = 0.7437, lr_0 = 7.5552e-04
Loss = 1.8316e-01, PNorm = 61.2031, GNorm = 0.6181, lr_0 = 7.5500e-04
Loss = 1.7643e-01, PNorm = 61.2300, GNorm = 1.8173, lr_0 = 7.5448e-04
Loss = 2.0089e-01, PNorm = 61.2533, GNorm = 0.8411, lr_0 = 7.5397e-04
Loss = 1.6441e-01, PNorm = 61.2822, GNorm = 1.0187, lr_0 = 7.5345e-04
Loss = 1.7433e-01, PNorm = 61.2991, GNorm = 1.0554, lr_0 = 7.5293e-04
Loss = 1.4651e-01, PNorm = 61.3228, GNorm = 1.1193, lr_0 = 7.5242e-04
Loss = 1.7251e-01, PNorm = 61.3333, GNorm = 0.8781, lr_0 = 7.5190e-04
Loss = 1.6874e-01, PNorm = 61.3552, GNorm = 1.5142, lr_0 = 7.5139e-04
Loss = 2.0188e-01, PNorm = 61.3663, GNorm = 1.1681, lr_0 = 7.5087e-04
Loss = 1.6589e-01, PNorm = 61.3856, GNorm = 0.8114, lr_0 = 7.5036e-04
Loss = 2.0076e-01, PNorm = 61.4039, GNorm = 0.8086, lr_0 = 7.4984e-04
Loss = 1.9097e-01, PNorm = 61.4363, GNorm = 1.6724, lr_0 = 7.4933e-04
Loss = 1.8923e-01, PNorm = 61.4564, GNorm = 1.4532, lr_0 = 7.4882e-04
Loss = 1.6954e-01, PNorm = 61.4791, GNorm = 1.3192, lr_0 = 7.4830e-04
Loss = 1.8890e-01, PNorm = 61.5109, GNorm = 1.1162, lr_0 = 7.4779e-04
Loss = 1.9689e-01, PNorm = 61.5368, GNorm = 2.5582, lr_0 = 7.4728e-04
Loss = 1.7643e-01, PNorm = 61.5631, GNorm = 0.7690, lr_0 = 7.4677e-04
Loss = 1.4886e-01, PNorm = 61.5851, GNorm = 0.8061, lr_0 = 7.4625e-04
Loss = 1.6538e-01, PNorm = 61.6065, GNorm = 0.8163, lr_0 = 7.4574e-04
Loss = 1.8070e-01, PNorm = 61.6367, GNorm = 0.5313, lr_0 = 7.4523e-04
Loss = 1.6483e-01, PNorm = 61.6620, GNorm = 1.2983, lr_0 = 7.4472e-04
Loss = 1.6399e-01, PNorm = 61.6837, GNorm = 0.9340, lr_0 = 7.4421e-04
Loss = 1.8540e-01, PNorm = 61.7043, GNorm = 0.5704, lr_0 = 7.4370e-04
Loss = 1.6356e-01, PNorm = 61.7292, GNorm = 1.5649, lr_0 = 7.4319e-04
Loss = 1.6115e-01, PNorm = 61.7508, GNorm = 1.1271, lr_0 = 7.4268e-04
Loss = 1.8295e-01, PNorm = 61.7778, GNorm = 1.5691, lr_0 = 7.4217e-04
Loss = 1.7417e-01, PNorm = 61.7966, GNorm = 0.8024, lr_0 = 7.4167e-04
Loss = 1.8442e-01, PNorm = 61.8268, GNorm = 1.9111, lr_0 = 7.4116e-04
Loss = 1.9083e-01, PNorm = 61.8483, GNorm = 0.7619, lr_0 = 7.4065e-04
Loss = 1.7878e-01, PNorm = 61.8719, GNorm = 1.9242, lr_0 = 7.4014e-04
Loss = 2.1284e-01, PNorm = 61.8900, GNorm = 1.0404, lr_0 = 7.3964e-04
Loss = 1.7392e-01, PNorm = 61.9182, GNorm = 1.0177, lr_0 = 7.3913e-04
Loss = 1.5719e-01, PNorm = 61.9427, GNorm = 0.9520, lr_0 = 7.3862e-04
Loss = 1.8145e-01, PNorm = 61.9672, GNorm = 1.3141, lr_0 = 7.3812e-04
Loss = 1.7895e-01, PNorm = 61.9950, GNorm = 1.5271, lr_0 = 7.3761e-04
Loss = 1.9187e-01, PNorm = 62.0149, GNorm = 0.9602, lr_0 = 7.3711e-04
Loss = 1.4081e-01, PNorm = 62.0358, GNorm = 1.7576, lr_0 = 7.3660e-04
Loss = 1.6000e-01, PNorm = 62.0580, GNorm = 1.1916, lr_0 = 7.3610e-04
Loss = 1.6420e-01, PNorm = 62.0835, GNorm = 0.4570, lr_0 = 7.3559e-04
Loss = 1.4781e-01, PNorm = 62.1029, GNorm = 0.6394, lr_0 = 7.3509e-04
Loss = 1.7756e-01, PNorm = 62.1181, GNorm = 0.9400, lr_0 = 7.3458e-04
Loss = 1.9044e-01, PNorm = 62.1354, GNorm = 1.5269, lr_0 = 7.3408e-04
Loss = 1.9397e-01, PNorm = 62.1511, GNorm = 0.8765, lr_0 = 7.3358e-04
Loss = 1.7444e-01, PNorm = 62.1862, GNorm = 1.0945, lr_0 = 7.3308e-04
Loss = 1.8742e-01, PNorm = 62.2095, GNorm = 1.2726, lr_0 = 7.3257e-04
Loss = 1.7638e-01, PNorm = 62.2349, GNorm = 1.7188, lr_0 = 7.3207e-04
Loss = 1.7158e-01, PNorm = 62.2508, GNorm = 0.8385, lr_0 = 7.3157e-04
Loss = 1.5688e-01, PNorm = 62.2673, GNorm = 0.4335, lr_0 = 7.3107e-04
Loss = 1.7442e-01, PNorm = 62.2869, GNorm = 0.9382, lr_0 = 7.3057e-04
Loss = 1.7472e-01, PNorm = 62.3139, GNorm = 1.2162, lr_0 = 7.3007e-04
Loss = 1.5354e-01, PNorm = 62.3326, GNorm = 0.5618, lr_0 = 7.2957e-04
Loss = 1.6924e-01, PNorm = 62.3495, GNorm = 1.2138, lr_0 = 7.2907e-04
Loss = 1.7306e-01, PNorm = 62.3674, GNorm = 0.7274, lr_0 = 7.2857e-04
Loss = 1.5512e-01, PNorm = 62.3873, GNorm = 1.1021, lr_0 = 7.2807e-04
Loss = 1.5785e-01, PNorm = 62.4057, GNorm = 0.6826, lr_0 = 7.2757e-04
Loss = 1.7880e-01, PNorm = 62.4227, GNorm = 0.7251, lr_0 = 7.2707e-04
Loss = 1.6375e-01, PNorm = 62.4435, GNorm = 1.1847, lr_0 = 7.2657e-04
Loss = 1.7724e-01, PNorm = 62.4707, GNorm = 1.0263, lr_0 = 7.2608e-04
Loss = 1.7447e-01, PNorm = 62.4917, GNorm = 0.6379, lr_0 = 7.2558e-04
Loss = 1.6405e-01, PNorm = 62.5190, GNorm = 0.9957, lr_0 = 7.2508e-04
Loss = 1.8504e-01, PNorm = 62.5429, GNorm = 0.6505, lr_0 = 7.2458e-04
Loss = 1.6546e-01, PNorm = 62.5611, GNorm = 1.1068, lr_0 = 7.2409e-04
Loss = 1.5911e-01, PNorm = 62.5807, GNorm = 0.7139, lr_0 = 7.2359e-04
Loss = 1.6217e-01, PNorm = 62.5977, GNorm = 0.6696, lr_0 = 7.2310e-04
Loss = 1.8722e-01, PNorm = 62.6172, GNorm = 1.7696, lr_0 = 7.2260e-04
Loss = 1.6755e-01, PNorm = 62.6360, GNorm = 0.7313, lr_0 = 7.2211e-04
Loss = 1.8254e-01, PNorm = 62.6509, GNorm = 0.9932, lr_0 = 7.2161e-04
Loss = 1.8502e-01, PNorm = 62.6728, GNorm = 0.8541, lr_0 = 7.2112e-04
Loss = 1.9894e-01, PNorm = 62.6966, GNorm = 1.0961, lr_0 = 7.2062e-04
Loss = 1.7957e-01, PNorm = 62.7171, GNorm = 0.5802, lr_0 = 7.2013e-04
Loss = 1.7170e-01, PNorm = 62.7368, GNorm = 1.0364, lr_0 = 7.1964e-04
Validation mae = 0.252189
Epoch 6
Loss = 1.6714e-01, PNorm = 62.7626, GNorm = 0.6927, lr_0 = 7.1914e-04
Loss = 1.6419e-01, PNorm = 62.7861, GNorm = 0.7037, lr_0 = 7.1865e-04
Loss = 1.4693e-01, PNorm = 62.8083, GNorm = 0.9946, lr_0 = 7.1816e-04
Loss = 1.6952e-01, PNorm = 62.8282, GNorm = 1.1826, lr_0 = 7.1767e-04
Loss = 1.8625e-01, PNorm = 62.8512, GNorm = 0.9674, lr_0 = 7.1717e-04
Loss = 1.5541e-01, PNorm = 62.8757, GNorm = 0.6672, lr_0 = 7.1668e-04
Loss = 1.6731e-01, PNorm = 62.9055, GNorm = 0.8307, lr_0 = 7.1619e-04
Loss = 1.6096e-01, PNorm = 62.9253, GNorm = 0.5973, lr_0 = 7.1570e-04
Loss = 1.6367e-01, PNorm = 62.9421, GNorm = 0.9216, lr_0 = 7.1521e-04
Loss = 1.8447e-01, PNorm = 62.9680, GNorm = 0.6747, lr_0 = 7.1472e-04
Loss = 1.7238e-01, PNorm = 62.9831, GNorm = 1.2146, lr_0 = 7.1423e-04
Loss = 1.6655e-01, PNorm = 63.0004, GNorm = 0.8420, lr_0 = 7.1374e-04
Loss = 1.6476e-01, PNorm = 63.0204, GNorm = 1.2346, lr_0 = 7.1325e-04
Loss = 1.6237e-01, PNorm = 63.0497, GNorm = 0.8289, lr_0 = 7.1277e-04
Loss = 1.4389e-01, PNorm = 63.0766, GNorm = 0.6102, lr_0 = 7.1228e-04
Loss = 1.4907e-01, PNorm = 63.1081, GNorm = 0.6719, lr_0 = 7.1179e-04
Loss = 1.4482e-01, PNorm = 63.1356, GNorm = 1.4498, lr_0 = 7.1130e-04
Loss = 1.6494e-01, PNorm = 63.1502, GNorm = 0.7148, lr_0 = 7.1081e-04
Loss = 1.6730e-01, PNorm = 63.1662, GNorm = 0.9250, lr_0 = 7.1033e-04
Loss = 1.4747e-01, PNorm = 63.1825, GNorm = 0.8286, lr_0 = 7.0984e-04
Loss = 1.7769e-01, PNorm = 63.2091, GNorm = 1.3053, lr_0 = 7.0935e-04
Loss = 1.5524e-01, PNorm = 63.2325, GNorm = 1.7083, lr_0 = 7.0887e-04
Loss = 1.8084e-01, PNorm = 63.2620, GNorm = 1.1627, lr_0 = 7.0838e-04
Loss = 1.5346e-01, PNorm = 63.2807, GNorm = 1.2300, lr_0 = 7.0790e-04
Loss = 1.6831e-01, PNorm = 63.3049, GNorm = 1.1417, lr_0 = 7.0741e-04
Loss = 1.5751e-01, PNorm = 63.3273, GNorm = 0.8612, lr_0 = 7.0693e-04
Loss = 1.7762e-01, PNorm = 63.3444, GNorm = 1.1085, lr_0 = 7.0644e-04
Loss = 1.4968e-01, PNorm = 63.3697, GNorm = 1.0699, lr_0 = 7.0596e-04
Loss = 1.7575e-01, PNorm = 63.3875, GNorm = 1.3541, lr_0 = 7.0548e-04
Loss = 1.5569e-01, PNorm = 63.4095, GNorm = 0.8579, lr_0 = 7.0499e-04
Loss = 1.6363e-01, PNorm = 63.4343, GNorm = 0.9240, lr_0 = 7.0451e-04
Loss = 1.4569e-01, PNorm = 63.4483, GNorm = 0.8370, lr_0 = 7.0403e-04
Loss = 1.5845e-01, PNorm = 63.4653, GNorm = 0.7983, lr_0 = 7.0354e-04
Loss = 1.8336e-01, PNorm = 63.4797, GNorm = 0.9317, lr_0 = 7.0306e-04
Loss = 1.5516e-01, PNorm = 63.4983, GNorm = 1.1667, lr_0 = 7.0258e-04
Loss = 1.6271e-01, PNorm = 63.5240, GNorm = 0.6666, lr_0 = 7.0210e-04
Loss = 1.7901e-01, PNorm = 63.5535, GNorm = 0.7920, lr_0 = 7.0162e-04
Loss = 1.6390e-01, PNorm = 63.5696, GNorm = 0.5556, lr_0 = 7.0114e-04
Loss = 1.8380e-01, PNorm = 63.5951, GNorm = 0.8597, lr_0 = 7.0066e-04
Loss = 1.5965e-01, PNorm = 63.6086, GNorm = 0.9766, lr_0 = 7.0018e-04
Loss = 1.6994e-01, PNorm = 63.6209, GNorm = 0.9042, lr_0 = 6.9970e-04
Loss = 1.6280e-01, PNorm = 63.6461, GNorm = 0.6609, lr_0 = 6.9922e-04
Loss = 1.5713e-01, PNorm = 63.6694, GNorm = 0.9413, lr_0 = 6.9874e-04
Loss = 1.7744e-01, PNorm = 63.6968, GNorm = 0.7034, lr_0 = 6.9826e-04
Loss = 1.6783e-01, PNorm = 63.7289, GNorm = 1.7000, lr_0 = 6.9778e-04
Loss = 1.6950e-01, PNorm = 63.7492, GNorm = 0.7283, lr_0 = 6.9730e-04
Loss = 1.5717e-01, PNorm = 63.7748, GNorm = 1.4024, lr_0 = 6.9683e-04
Loss = 1.6911e-01, PNorm = 63.8010, GNorm = 0.6962, lr_0 = 6.9635e-04
Loss = 1.6626e-01, PNorm = 63.8253, GNorm = 0.6669, lr_0 = 6.9587e-04
Loss = 1.5471e-01, PNorm = 63.8483, GNorm = 0.7405, lr_0 = 6.9540e-04
Loss = 1.5056e-01, PNorm = 63.8606, GNorm = 1.5133, lr_0 = 6.9492e-04
Loss = 1.4992e-01, PNorm = 63.8774, GNorm = 0.9050, lr_0 = 6.9444e-04
Loss = 1.6789e-01, PNorm = 63.8971, GNorm = 1.0581, lr_0 = 6.9397e-04
Loss = 1.5632e-01, PNorm = 63.9141, GNorm = 0.7113, lr_0 = 6.9349e-04
Loss = 1.8593e-01, PNorm = 63.9391, GNorm = 0.7366, lr_0 = 6.9302e-04
Loss = 1.8182e-01, PNorm = 63.9611, GNorm = 0.5945, lr_0 = 6.9254e-04
Loss = 1.6313e-01, PNorm = 63.9797, GNorm = 0.9426, lr_0 = 6.9207e-04
Loss = 1.5428e-01, PNorm = 64.0040, GNorm = 0.6925, lr_0 = 6.9159e-04
Loss = 1.7696e-01, PNorm = 64.0269, GNorm = 1.5287, lr_0 = 6.9112e-04
Loss = 1.6402e-01, PNorm = 64.0583, GNorm = 0.7633, lr_0 = 6.9065e-04
Loss = 1.6874e-01, PNorm = 64.0807, GNorm = 0.6372, lr_0 = 6.9017e-04
Loss = 1.7474e-01, PNorm = 64.1081, GNorm = 1.3781, lr_0 = 6.8970e-04
Loss = 1.6467e-01, PNorm = 64.1259, GNorm = 0.6058, lr_0 = 6.8923e-04
Loss = 1.4665e-01, PNorm = 64.1419, GNorm = 0.5608, lr_0 = 6.8876e-04
Loss = 1.6034e-01, PNorm = 64.1621, GNorm = 0.6162, lr_0 = 6.8828e-04
Loss = 1.6154e-01, PNorm = 64.1832, GNorm = 0.7955, lr_0 = 6.8781e-04
Loss = 1.6524e-01, PNorm = 64.1961, GNorm = 1.0506, lr_0 = 6.8734e-04
Loss = 1.6931e-01, PNorm = 64.2245, GNorm = 1.3466, lr_0 = 6.8687e-04
Loss = 1.4704e-01, PNorm = 64.2479, GNorm = 0.6342, lr_0 = 6.8640e-04
Loss = 1.7094e-01, PNorm = 64.2743, GNorm = 0.6819, lr_0 = 6.8593e-04
Loss = 1.8265e-01, PNorm = 64.2969, GNorm = 0.6249, lr_0 = 6.8546e-04
Loss = 1.8367e-01, PNorm = 64.3221, GNorm = 1.3223, lr_0 = 6.8499e-04
Loss = 1.6648e-01, PNorm = 64.3303, GNorm = 0.8019, lr_0 = 6.8452e-04
Loss = 1.5623e-01, PNorm = 64.3554, GNorm = 1.8344, lr_0 = 6.8405e-04
Loss = 1.6637e-01, PNorm = 64.3771, GNorm = 0.6436, lr_0 = 6.8358e-04
Loss = 1.6160e-01, PNorm = 64.4027, GNorm = 1.2163, lr_0 = 6.8312e-04
Loss = 1.5078e-01, PNorm = 64.4183, GNorm = 1.1080, lr_0 = 6.8265e-04
Loss = 1.8874e-01, PNorm = 64.4408, GNorm = 1.1264, lr_0 = 6.8218e-04
Loss = 1.6766e-01, PNorm = 64.4654, GNorm = 0.7353, lr_0 = 6.8171e-04
Loss = 1.6828e-01, PNorm = 64.4860, GNorm = 0.7051, lr_0 = 6.8125e-04
Loss = 1.6749e-01, PNorm = 64.5050, GNorm = 1.1343, lr_0 = 6.8078e-04
Loss = 1.8572e-01, PNorm = 64.5270, GNorm = 1.0340, lr_0 = 6.8031e-04
Loss = 1.6962e-01, PNorm = 64.5480, GNorm = 0.7703, lr_0 = 6.7985e-04
Loss = 1.6278e-01, PNorm = 64.5656, GNorm = 0.7505, lr_0 = 6.7938e-04
Loss = 1.5331e-01, PNorm = 64.5891, GNorm = 0.7299, lr_0 = 6.7892e-04
Loss = 1.5575e-01, PNorm = 64.6099, GNorm = 0.6816, lr_0 = 6.7845e-04
Loss = 1.5874e-01, PNorm = 64.6341, GNorm = 0.6285, lr_0 = 6.7799e-04
Loss = 1.5918e-01, PNorm = 64.6553, GNorm = 0.9187, lr_0 = 6.7752e-04
Loss = 1.4588e-01, PNorm = 64.6752, GNorm = 0.6502, lr_0 = 6.7706e-04
Loss = 1.7481e-01, PNorm = 64.6988, GNorm = 1.2975, lr_0 = 6.7659e-04
Loss = 2.0534e-01, PNorm = 64.7203, GNorm = 2.8434, lr_0 = 6.7613e-04
Loss = 1.7276e-01, PNorm = 64.7469, GNorm = 2.4478, lr_0 = 6.7567e-04
Loss = 1.6581e-01, PNorm = 64.7798, GNorm = 1.0860, lr_0 = 6.7520e-04
Loss = 1.8050e-01, PNorm = 64.8153, GNorm = 0.8599, lr_0 = 6.7474e-04
Loss = 1.4912e-01, PNorm = 64.8410, GNorm = 0.5556, lr_0 = 6.7428e-04
Loss = 1.5963e-01, PNorm = 64.8647, GNorm = 0.6033, lr_0 = 6.7382e-04
Loss = 1.6408e-01, PNorm = 64.8847, GNorm = 1.5768, lr_0 = 6.7335e-04
Loss = 1.5969e-01, PNorm = 64.8996, GNorm = 1.1593, lr_0 = 6.7289e-04
Loss = 1.7903e-01, PNorm = 64.9191, GNorm = 1.3162, lr_0 = 6.7243e-04
Loss = 1.6343e-01, PNorm = 64.9422, GNorm = 1.3687, lr_0 = 6.7197e-04
Loss = 1.7127e-01, PNorm = 64.9619, GNorm = 0.7003, lr_0 = 6.7151e-04
Loss = 1.6697e-01, PNorm = 64.9840, GNorm = 0.9424, lr_0 = 6.7105e-04
Loss = 1.6928e-01, PNorm = 65.0015, GNorm = 0.5683, lr_0 = 6.7059e-04
Loss = 1.6838e-01, PNorm = 65.0218, GNorm = 0.8286, lr_0 = 6.7013e-04
Loss = 1.8929e-01, PNorm = 65.0412, GNorm = 0.9874, lr_0 = 6.6967e-04
Loss = 1.4989e-01, PNorm = 65.0583, GNorm = 1.0524, lr_0 = 6.6921e-04
Loss = 1.6288e-01, PNorm = 65.0764, GNorm = 0.6922, lr_0 = 6.6876e-04
Loss = 1.6526e-01, PNorm = 65.0977, GNorm = 0.6596, lr_0 = 6.6830e-04
Loss = 1.5219e-01, PNorm = 65.1185, GNorm = 0.4994, lr_0 = 6.6784e-04
Loss = 1.7143e-01, PNorm = 65.1387, GNorm = 0.6753, lr_0 = 6.6738e-04
Loss = 1.9603e-01, PNorm = 65.1626, GNorm = 1.0862, lr_0 = 6.6693e-04
Loss = 1.6903e-01, PNorm = 65.1838, GNorm = 0.7291, lr_0 = 6.6647e-04
Loss = 1.7032e-01, PNorm = 65.2123, GNorm = 0.6975, lr_0 = 6.6601e-04
Loss = 1.7475e-01, PNorm = 65.2415, GNorm = 0.6825, lr_0 = 6.6556e-04
Loss = 1.7756e-01, PNorm = 65.2550, GNorm = 0.8250, lr_0 = 6.6510e-04
Loss = 1.8825e-01, PNorm = 65.2695, GNorm = 0.6216, lr_0 = 6.6464e-04
Loss = 1.7083e-01, PNorm = 65.2975, GNorm = 0.7269, lr_0 = 6.6419e-04
Loss = 1.6740e-01, PNorm = 65.3195, GNorm = 1.1568, lr_0 = 6.6373e-04
Loss = 1.5693e-01, PNorm = 65.3426, GNorm = 0.5869, lr_0 = 6.6328e-04
Loss = 1.5576e-01, PNorm = 65.3642, GNorm = 0.8822, lr_0 = 6.6282e-04
Validation mae = 0.244487
Epoch 7
Loss = 1.5659e-01, PNorm = 65.3893, GNorm = 1.1525, lr_0 = 6.6237e-04
Loss = 1.7706e-01, PNorm = 65.4143, GNorm = 1.3746, lr_0 = 6.6192e-04
Loss = 1.4713e-01, PNorm = 65.4411, GNorm = 0.9995, lr_0 = 6.6146e-04
Loss = 1.5809e-01, PNorm = 65.4667, GNorm = 0.9274, lr_0 = 6.6101e-04
Loss = 1.5901e-01, PNorm = 65.4903, GNorm = 1.3476, lr_0 = 6.6056e-04
Loss = 1.5421e-01, PNorm = 65.5211, GNorm = 0.8931, lr_0 = 6.6011e-04
Loss = 1.5148e-01, PNorm = 65.5422, GNorm = 1.3530, lr_0 = 6.5965e-04
Loss = 1.4449e-01, PNorm = 65.5626, GNorm = 0.9496, lr_0 = 6.5920e-04
Loss = 1.7165e-01, PNorm = 65.5808, GNorm = 0.8337, lr_0 = 6.5875e-04
Loss = 1.5489e-01, PNorm = 65.5973, GNorm = 1.3136, lr_0 = 6.5830e-04
Loss = 1.6746e-01, PNorm = 65.6219, GNorm = 1.4715, lr_0 = 6.5785e-04
Loss = 1.7907e-01, PNorm = 65.6437, GNorm = 1.3325, lr_0 = 6.5740e-04
Loss = 1.7127e-01, PNorm = 65.6614, GNorm = 1.5017, lr_0 = 6.5695e-04
Loss = 1.4571e-01, PNorm = 65.6905, GNorm = 0.6258, lr_0 = 6.5650e-04
Loss = 1.4978e-01, PNorm = 65.7063, GNorm = 0.7125, lr_0 = 6.5605e-04
Loss = 1.4342e-01, PNorm = 65.7285, GNorm = 0.6874, lr_0 = 6.5560e-04
Loss = 1.3628e-01, PNorm = 65.7491, GNorm = 1.0624, lr_0 = 6.5515e-04
Loss = 1.5896e-01, PNorm = 65.7677, GNorm = 0.8387, lr_0 = 6.5470e-04
Loss = 1.3963e-01, PNorm = 65.7835, GNorm = 0.7346, lr_0 = 6.5425e-04
Loss = 1.4175e-01, PNorm = 65.8066, GNorm = 1.4702, lr_0 = 6.5380e-04
Loss = 1.7711e-01, PNorm = 65.8286, GNorm = 0.6603, lr_0 = 6.5335e-04
Loss = 1.5576e-01, PNorm = 65.8488, GNorm = 0.6155, lr_0 = 6.5291e-04
Loss = 1.5306e-01, PNorm = 65.8742, GNorm = 0.5519, lr_0 = 6.5246e-04
Loss = 1.6044e-01, PNorm = 65.8939, GNorm = 1.2536, lr_0 = 6.5201e-04
Loss = 1.5127e-01, PNorm = 65.9189, GNorm = 1.3161, lr_0 = 6.5157e-04
Loss = 1.7307e-01, PNorm = 65.9359, GNorm = 1.6835, lr_0 = 6.5112e-04
Loss = 1.6159e-01, PNorm = 65.9586, GNorm = 1.0041, lr_0 = 6.5067e-04
Loss = 1.6651e-01, PNorm = 65.9866, GNorm = 0.8650, lr_0 = 6.5023e-04
Loss = 1.7365e-01, PNorm = 66.0157, GNorm = 1.1786, lr_0 = 6.4978e-04
Loss = 1.5660e-01, PNorm = 66.0371, GNorm = 0.9816, lr_0 = 6.4934e-04
Loss = 1.4670e-01, PNorm = 66.0564, GNorm = 1.5651, lr_0 = 6.4889e-04
Loss = 1.6278e-01, PNorm = 66.0782, GNorm = 1.1811, lr_0 = 6.4845e-04
Loss = 1.4396e-01, PNorm = 66.0997, GNorm = 0.6401, lr_0 = 6.4800e-04
Loss = 1.4131e-01, PNorm = 66.1163, GNorm = 0.9819, lr_0 = 6.4756e-04
Loss = 1.4576e-01, PNorm = 66.1355, GNorm = 0.6192, lr_0 = 6.4712e-04
Loss = 1.5750e-01, PNorm = 66.1533, GNorm = 1.3026, lr_0 = 6.4667e-04
Loss = 1.5319e-01, PNorm = 66.1675, GNorm = 0.8826, lr_0 = 6.4623e-04
Loss = 1.4321e-01, PNorm = 66.1891, GNorm = 1.1676, lr_0 = 6.4579e-04
Loss = 1.7234e-01, PNorm = 66.2086, GNorm = 0.5366, lr_0 = 6.4534e-04
Loss = 1.5979e-01, PNorm = 66.2271, GNorm = 0.6275, lr_0 = 6.4490e-04
Loss = 1.5624e-01, PNorm = 66.2514, GNorm = 0.7143, lr_0 = 6.4446e-04
Loss = 1.8073e-01, PNorm = 66.2741, GNorm = 0.8867, lr_0 = 6.4402e-04
Loss = 1.6325e-01, PNorm = 66.2950, GNorm = 0.9300, lr_0 = 6.4358e-04
Loss = 1.6561e-01, PNorm = 66.3145, GNorm = 0.8026, lr_0 = 6.4314e-04
Loss = 1.4208e-01, PNorm = 66.3280, GNorm = 0.8097, lr_0 = 6.4270e-04
Loss = 1.6788e-01, PNorm = 66.3473, GNorm = 0.6016, lr_0 = 6.4226e-04
Loss = 1.5474e-01, PNorm = 66.3627, GNorm = 0.6357, lr_0 = 6.4182e-04
Loss = 1.5336e-01, PNorm = 66.3807, GNorm = 1.5779, lr_0 = 6.4138e-04
Loss = 1.7427e-01, PNorm = 66.3998, GNorm = 0.6566, lr_0 = 6.4094e-04
Loss = 1.6851e-01, PNorm = 66.4200, GNorm = 0.6094, lr_0 = 6.4050e-04
Loss = 1.4422e-01, PNorm = 66.4419, GNorm = 0.6639, lr_0 = 6.4006e-04
Loss = 1.7549e-01, PNorm = 66.4572, GNorm = 0.5619, lr_0 = 6.3962e-04
Loss = 1.6056e-01, PNorm = 66.4719, GNorm = 0.8138, lr_0 = 6.3918e-04
Loss = 1.6439e-01, PNorm = 66.4895, GNorm = 0.8882, lr_0 = 6.3874e-04
Loss = 1.5275e-01, PNorm = 66.5149, GNorm = 1.7385, lr_0 = 6.3831e-04
Loss = 1.3812e-01, PNorm = 66.5303, GNorm = 0.7367, lr_0 = 6.3787e-04
Loss = 1.4407e-01, PNorm = 66.5523, GNorm = 0.6351, lr_0 = 6.3743e-04
Loss = 1.5542e-01, PNorm = 66.5725, GNorm = 0.6642, lr_0 = 6.3700e-04
Loss = 1.6125e-01, PNorm = 66.5910, GNorm = 0.6938, lr_0 = 6.3656e-04
Loss = 1.5640e-01, PNorm = 66.6059, GNorm = 0.7715, lr_0 = 6.3612e-04
Loss = 1.5985e-01, PNorm = 66.6166, GNorm = 1.0079, lr_0 = 6.3569e-04
Loss = 1.3211e-01, PNorm = 66.6370, GNorm = 1.5366, lr_0 = 6.3525e-04
Loss = 1.4963e-01, PNorm = 66.6544, GNorm = 0.5344, lr_0 = 6.3482e-04
Loss = 1.5211e-01, PNorm = 66.6748, GNorm = 1.9828, lr_0 = 6.3438e-04
Loss = 1.5188e-01, PNorm = 66.6918, GNorm = 0.8661, lr_0 = 6.3395e-04
Loss = 1.5148e-01, PNorm = 66.7136, GNorm = 1.0923, lr_0 = 6.3351e-04
Loss = 1.5823e-01, PNorm = 66.7343, GNorm = 0.8151, lr_0 = 6.3308e-04
Loss = 1.4930e-01, PNorm = 66.7572, GNorm = 0.7295, lr_0 = 6.3265e-04
Loss = 1.5003e-01, PNorm = 66.7743, GNorm = 1.1398, lr_0 = 6.3221e-04
Loss = 1.8144e-01, PNorm = 66.8021, GNorm = 1.7747, lr_0 = 6.3178e-04
Loss = 1.5797e-01, PNorm = 66.8285, GNorm = 0.9950, lr_0 = 6.3135e-04
Loss = 1.5395e-01, PNorm = 66.8510, GNorm = 0.7821, lr_0 = 6.3091e-04
Loss = 1.8186e-01, PNorm = 66.8705, GNorm = 0.6626, lr_0 = 6.3048e-04
Loss = 1.4382e-01, PNorm = 66.8894, GNorm = 0.6618, lr_0 = 6.3005e-04
Loss = 1.5953e-01, PNorm = 66.9067, GNorm = 0.9198, lr_0 = 6.2962e-04
Loss = 1.8037e-01, PNorm = 66.9270, GNorm = 0.7961, lr_0 = 6.2919e-04
Loss = 1.5373e-01, PNorm = 66.9527, GNorm = 1.1608, lr_0 = 6.2876e-04
Loss = 1.8343e-01, PNorm = 66.9820, GNorm = 0.6598, lr_0 = 6.2833e-04
Loss = 1.6418e-01, PNorm = 67.0020, GNorm = 2.6520, lr_0 = 6.2789e-04
Loss = 1.7704e-01, PNorm = 67.0221, GNorm = 0.9348, lr_0 = 6.2746e-04
Loss = 1.5302e-01, PNorm = 67.0469, GNorm = 1.3848, lr_0 = 6.2703e-04
Loss = 1.7190e-01, PNorm = 67.0691, GNorm = 1.0573, lr_0 = 6.2661e-04
Loss = 1.3698e-01, PNorm = 67.0978, GNorm = 0.6694, lr_0 = 6.2618e-04
Loss = 1.5033e-01, PNorm = 67.1169, GNorm = 0.6533, lr_0 = 6.2575e-04
Loss = 1.5814e-01, PNorm = 67.1346, GNorm = 0.6774, lr_0 = 6.2532e-04
Loss = 1.5216e-01, PNorm = 67.1538, GNorm = 1.0240, lr_0 = 6.2489e-04
Loss = 1.6065e-01, PNorm = 67.1725, GNorm = 0.7999, lr_0 = 6.2446e-04
Loss = 1.6173e-01, PNorm = 67.1885, GNorm = 0.7520, lr_0 = 6.2403e-04
Loss = 1.7486e-01, PNorm = 67.2085, GNorm = 1.1476, lr_0 = 6.2361e-04
Loss = 1.7676e-01, PNorm = 67.2312, GNorm = 0.7336, lr_0 = 6.2318e-04
Loss = 1.4374e-01, PNorm = 67.2584, GNorm = 1.2851, lr_0 = 6.2275e-04
Loss = 1.5687e-01, PNorm = 67.2768, GNorm = 0.8077, lr_0 = 6.2233e-04
Loss = 1.7564e-01, PNorm = 67.2960, GNorm = 1.3049, lr_0 = 6.2190e-04
Loss = 1.3566e-01, PNorm = 67.3098, GNorm = 0.8174, lr_0 = 6.2147e-04
Loss = 1.5450e-01, PNorm = 67.3259, GNorm = 0.8158, lr_0 = 6.2105e-04
Loss = 1.6602e-01, PNorm = 67.3439, GNorm = 0.8674, lr_0 = 6.2062e-04
Loss = 1.4291e-01, PNorm = 67.3597, GNorm = 0.6587, lr_0 = 6.2020e-04
Loss = 1.7902e-01, PNorm = 67.3769, GNorm = 0.6516, lr_0 = 6.1977e-04
Loss = 1.6904e-01, PNorm = 67.3998, GNorm = 0.9051, lr_0 = 6.1935e-04
Loss = 1.5344e-01, PNorm = 67.4228, GNorm = 0.7469, lr_0 = 6.1892e-04
Loss = 1.6215e-01, PNorm = 67.4499, GNorm = 1.1837, lr_0 = 6.1850e-04
Loss = 1.4728e-01, PNorm = 67.4785, GNorm = 0.7472, lr_0 = 6.1808e-04
Loss = 1.6098e-01, PNorm = 67.5010, GNorm = 0.8710, lr_0 = 6.1765e-04
Loss = 1.3719e-01, PNorm = 67.5264, GNorm = 0.6625, lr_0 = 6.1723e-04
Loss = 1.6002e-01, PNorm = 67.5475, GNorm = 0.7780, lr_0 = 6.1681e-04
Loss = 1.6924e-01, PNorm = 67.5712, GNorm = 1.6733, lr_0 = 6.1638e-04
Loss = 1.6204e-01, PNorm = 67.5955, GNorm = 0.8633, lr_0 = 6.1596e-04
Loss = 1.5984e-01, PNorm = 67.6173, GNorm = 1.2374, lr_0 = 6.1554e-04
Loss = 1.4530e-01, PNorm = 67.6338, GNorm = 0.7398, lr_0 = 6.1512e-04
Loss = 1.5085e-01, PNorm = 67.6529, GNorm = 1.0587, lr_0 = 6.1470e-04
Loss = 1.6068e-01, PNorm = 67.6696, GNorm = 0.6507, lr_0 = 6.1428e-04
Loss = 1.5847e-01, PNorm = 67.6908, GNorm = 1.1655, lr_0 = 6.1385e-04
Loss = 1.5068e-01, PNorm = 67.7066, GNorm = 0.6054, lr_0 = 6.1343e-04
Loss = 1.7970e-01, PNorm = 67.7191, GNorm = 1.0011, lr_0 = 6.1301e-04
Loss = 1.6295e-01, PNorm = 67.7394, GNorm = 1.2736, lr_0 = 6.1259e-04
Loss = 1.4184e-01, PNorm = 67.7637, GNorm = 0.7740, lr_0 = 6.1217e-04
Loss = 1.4897e-01, PNorm = 67.7861, GNorm = 0.8544, lr_0 = 6.1175e-04
Loss = 1.5717e-01, PNorm = 67.8057, GNorm = 0.8826, lr_0 = 6.1134e-04
Loss = 1.6358e-01, PNorm = 67.8188, GNorm = 0.8379, lr_0 = 6.1092e-04
Loss = 1.5349e-01, PNorm = 67.8334, GNorm = 1.1629, lr_0 = 6.1050e-04
Validation mae = 0.264020
Epoch 8
Loss = 1.6711e-01, PNorm = 67.8571, GNorm = 0.8866, lr_0 = 6.1008e-04
Loss = 1.5125e-01, PNorm = 67.8779, GNorm = 1.2175, lr_0 = 6.0966e-04
Loss = 1.3281e-01, PNorm = 67.8906, GNorm = 1.3965, lr_0 = 6.0924e-04
Loss = 1.4862e-01, PNorm = 67.9042, GNorm = 0.9536, lr_0 = 6.0883e-04
Loss = 1.5055e-01, PNorm = 67.9274, GNorm = 1.7257, lr_0 = 6.0841e-04
Loss = 1.4590e-01, PNorm = 67.9508, GNorm = 1.2039, lr_0 = 6.0799e-04
Loss = 1.7596e-01, PNorm = 67.9718, GNorm = 1.1105, lr_0 = 6.0758e-04
Loss = 1.9089e-01, PNorm = 68.0042, GNorm = 1.1571, lr_0 = 6.0716e-04
Loss = 1.4184e-01, PNorm = 68.0383, GNorm = 0.6398, lr_0 = 6.0674e-04
Loss = 1.5607e-01, PNorm = 68.0649, GNorm = 1.3851, lr_0 = 6.0633e-04
Loss = 1.6347e-01, PNorm = 68.0875, GNorm = 0.9046, lr_0 = 6.0591e-04
Loss = 1.4189e-01, PNorm = 68.1112, GNorm = 0.6327, lr_0 = 6.0550e-04
Loss = 1.3699e-01, PNorm = 68.1353, GNorm = 0.4951, lr_0 = 6.0508e-04
Loss = 1.5986e-01, PNorm = 68.1403, GNorm = 0.8310, lr_0 = 6.0467e-04
Loss = 1.2660e-01, PNorm = 68.1612, GNorm = 0.7616, lr_0 = 6.0425e-04
Loss = 1.4099e-01, PNorm = 68.1761, GNorm = 1.2712, lr_0 = 6.0384e-04
Loss = 1.2987e-01, PNorm = 68.1966, GNorm = 0.6384, lr_0 = 6.0343e-04
Loss = 1.6290e-01, PNorm = 68.2065, GNorm = 0.8245, lr_0 = 6.0301e-04
Loss = 1.4855e-01, PNorm = 68.2218, GNorm = 0.6579, lr_0 = 6.0260e-04
Loss = 1.4646e-01, PNorm = 68.2438, GNorm = 1.2221, lr_0 = 6.0219e-04
Loss = 1.5529e-01, PNorm = 68.2640, GNorm = 0.6645, lr_0 = 6.0178e-04
Loss = 1.5184e-01, PNorm = 68.2874, GNorm = 0.8497, lr_0 = 6.0136e-04
Loss = 1.4671e-01, PNorm = 68.3062, GNorm = 0.8271, lr_0 = 6.0095e-04
Loss = 1.4703e-01, PNorm = 68.3230, GNorm = 0.6199, lr_0 = 6.0054e-04
Loss = 1.4332e-01, PNorm = 68.3444, GNorm = 0.7176, lr_0 = 6.0013e-04
Loss = 1.4893e-01, PNorm = 68.3616, GNorm = 0.7938, lr_0 = 5.9972e-04
Loss = 1.8666e-01, PNorm = 68.3796, GNorm = 0.9370, lr_0 = 5.9931e-04
Loss = 1.6584e-01, PNorm = 68.4024, GNorm = 1.1935, lr_0 = 5.9890e-04
Loss = 1.6390e-01, PNorm = 68.4296, GNorm = 0.9030, lr_0 = 5.9849e-04
Loss = 1.6744e-01, PNorm = 68.4515, GNorm = 0.5699, lr_0 = 5.9808e-04
Loss = 1.2918e-01, PNorm = 68.4729, GNorm = 0.6475, lr_0 = 5.9767e-04
Loss = 1.5848e-01, PNorm = 68.4909, GNorm = 1.0885, lr_0 = 5.9726e-04
Loss = 1.3079e-01, PNorm = 68.5118, GNorm = 0.5111, lr_0 = 5.9685e-04
Loss = 1.4459e-01, PNorm = 68.5301, GNorm = 0.7411, lr_0 = 5.9644e-04
Loss = 1.6006e-01, PNorm = 68.5432, GNorm = 0.6689, lr_0 = 5.9603e-04
Loss = 1.4172e-01, PNorm = 68.5674, GNorm = 1.3397, lr_0 = 5.9562e-04
Loss = 1.4614e-01, PNorm = 68.5813, GNorm = 0.5627, lr_0 = 5.9521e-04
Loss = 1.3868e-01, PNorm = 68.5992, GNorm = 0.6986, lr_0 = 5.9481e-04
Loss = 1.4453e-01, PNorm = 68.6179, GNorm = 0.8115, lr_0 = 5.9440e-04
Loss = 1.6452e-01, PNorm = 68.6400, GNorm = 0.9431, lr_0 = 5.9399e-04
Loss = 1.5460e-01, PNorm = 68.6513, GNorm = 1.1577, lr_0 = 5.9358e-04
Loss = 1.4424e-01, PNorm = 68.6745, GNorm = 1.2836, lr_0 = 5.9318e-04
Loss = 1.5273e-01, PNorm = 68.6919, GNorm = 0.7195, lr_0 = 5.9277e-04
Loss = 1.5289e-01, PNorm = 68.7077, GNorm = 1.0112, lr_0 = 5.9236e-04
Loss = 1.5524e-01, PNorm = 68.7289, GNorm = 0.7189, lr_0 = 5.9196e-04
Loss = 1.4465e-01, PNorm = 68.7543, GNorm = 0.7464, lr_0 = 5.9155e-04
Loss = 1.5447e-01, PNorm = 68.7718, GNorm = 1.3599, lr_0 = 5.9115e-04
Loss = 1.5073e-01, PNorm = 68.7904, GNorm = 1.1681, lr_0 = 5.9074e-04
Loss = 1.6439e-01, PNorm = 68.8108, GNorm = 0.5529, lr_0 = 5.9034e-04
Loss = 1.2861e-01, PNorm = 68.8318, GNorm = 0.7301, lr_0 = 5.8993e-04
Loss = 1.4605e-01, PNorm = 68.8529, GNorm = 0.9719, lr_0 = 5.8953e-04
Loss = 1.5011e-01, PNorm = 68.8659, GNorm = 0.9103, lr_0 = 5.8913e-04
Loss = 1.5241e-01, PNorm = 68.8834, GNorm = 0.6664, lr_0 = 5.8872e-04
Loss = 1.4696e-01, PNorm = 68.8958, GNorm = 0.5384, lr_0 = 5.8832e-04
Loss = 1.5774e-01, PNorm = 68.9175, GNorm = 0.6342, lr_0 = 5.8792e-04
Loss = 1.4897e-01, PNorm = 68.9347, GNorm = 0.7348, lr_0 = 5.8751e-04
Loss = 1.3296e-01, PNorm = 68.9514, GNorm = 0.6582, lr_0 = 5.8711e-04
Loss = 1.4974e-01, PNorm = 68.9653, GNorm = 0.9714, lr_0 = 5.8671e-04
Loss = 1.3526e-01, PNorm = 68.9806, GNorm = 1.1432, lr_0 = 5.8631e-04
Loss = 1.5921e-01, PNorm = 68.9976, GNorm = 0.8529, lr_0 = 5.8591e-04
Loss = 1.6343e-01, PNorm = 69.0152, GNorm = 0.8357, lr_0 = 5.8550e-04
Loss = 1.5180e-01, PNorm = 69.0346, GNorm = 1.0642, lr_0 = 5.8510e-04
Loss = 1.6992e-01, PNorm = 69.0673, GNorm = 0.6355, lr_0 = 5.8470e-04
Loss = 1.3196e-01, PNorm = 69.0905, GNorm = 0.7173, lr_0 = 5.8430e-04
Loss = 1.5772e-01, PNorm = 69.1049, GNorm = 0.8172, lr_0 = 5.8390e-04
Loss = 1.4709e-01, PNorm = 69.1243, GNorm = 0.6810, lr_0 = 5.8350e-04
Loss = 1.7247e-01, PNorm = 69.1438, GNorm = 0.7833, lr_0 = 5.8310e-04
Loss = 1.5924e-01, PNorm = 69.1607, GNorm = 1.0691, lr_0 = 5.8270e-04
Loss = 1.4336e-01, PNorm = 69.1700, GNorm = 0.7805, lr_0 = 5.8230e-04
Loss = 1.5161e-01, PNorm = 69.1850, GNorm = 1.2451, lr_0 = 5.8190e-04
Loss = 1.2832e-01, PNorm = 69.2038, GNorm = 0.7212, lr_0 = 5.8151e-04
Loss = 1.6392e-01, PNorm = 69.2193, GNorm = 0.6851, lr_0 = 5.8111e-04
Loss = 1.4243e-01, PNorm = 69.2340, GNorm = 0.9669, lr_0 = 5.8071e-04
Loss = 1.5757e-01, PNorm = 69.2441, GNorm = 0.6714, lr_0 = 5.8031e-04
Loss = 1.4639e-01, PNorm = 69.2627, GNorm = 0.6530, lr_0 = 5.7991e-04
Loss = 1.4191e-01, PNorm = 69.2824, GNorm = 0.7870, lr_0 = 5.7952e-04
Loss = 1.4987e-01, PNorm = 69.2994, GNorm = 1.0699, lr_0 = 5.7912e-04
Loss = 1.4385e-01, PNorm = 69.3158, GNorm = 0.6776, lr_0 = 5.7872e-04
Loss = 1.5321e-01, PNorm = 69.3379, GNorm = 1.8170, lr_0 = 5.7833e-04
Loss = 1.4739e-01, PNorm = 69.3504, GNorm = 0.6160, lr_0 = 5.7793e-04
Loss = 1.5216e-01, PNorm = 69.3624, GNorm = 0.8007, lr_0 = 5.7753e-04
Loss = 1.3300e-01, PNorm = 69.3768, GNorm = 1.1767, lr_0 = 5.7714e-04
Loss = 1.4454e-01, PNorm = 69.3984, GNorm = 0.8613, lr_0 = 5.7674e-04
Loss = 1.4907e-01, PNorm = 69.4128, GNorm = 0.7783, lr_0 = 5.7635e-04
Loss = 1.5342e-01, PNorm = 69.4330, GNorm = 0.6579, lr_0 = 5.7595e-04
Loss = 1.4777e-01, PNorm = 69.4559, GNorm = 0.6822, lr_0 = 5.7556e-04
Loss = 1.5236e-01, PNorm = 69.4715, GNorm = 1.0657, lr_0 = 5.7516e-04
Loss = 1.3994e-01, PNorm = 69.4861, GNorm = 0.8145, lr_0 = 5.7477e-04
Loss = 1.3560e-01, PNorm = 69.5048, GNorm = 0.7901, lr_0 = 5.7438e-04
Loss = 1.2692e-01, PNorm = 69.5185, GNorm = 0.5507, lr_0 = 5.7398e-04
Loss = 1.4307e-01, PNorm = 69.5329, GNorm = 0.5779, lr_0 = 5.7359e-04
Loss = 1.3109e-01, PNorm = 69.5537, GNorm = 0.5336, lr_0 = 5.7320e-04
Loss = 1.4018e-01, PNorm = 69.5647, GNorm = 0.5865, lr_0 = 5.7280e-04
Loss = 1.5353e-01, PNorm = 69.5791, GNorm = 0.7284, lr_0 = 5.7241e-04
Loss = 1.3474e-01, PNorm = 69.5985, GNorm = 0.9028, lr_0 = 5.7202e-04
Loss = 1.4780e-01, PNorm = 69.6150, GNorm = 0.9192, lr_0 = 5.7163e-04
Loss = 1.7576e-01, PNorm = 69.6349, GNorm = 0.9041, lr_0 = 5.7124e-04
Loss = 1.3754e-01, PNorm = 69.6513, GNorm = 0.6768, lr_0 = 5.7084e-04
Loss = 1.5642e-01, PNorm = 69.6692, GNorm = 0.9657, lr_0 = 5.7045e-04
Loss = 1.4376e-01, PNorm = 69.6877, GNorm = 0.6103, lr_0 = 5.7006e-04
Loss = 1.3804e-01, PNorm = 69.7005, GNorm = 0.7128, lr_0 = 5.6967e-04
Loss = 1.4164e-01, PNorm = 69.7079, GNorm = 0.6029, lr_0 = 5.6928e-04
Loss = 1.4404e-01, PNorm = 69.7199, GNorm = 0.5760, lr_0 = 5.6889e-04
Loss = 1.4460e-01, PNorm = 69.7375, GNorm = 0.6557, lr_0 = 5.6850e-04
Loss = 1.3983e-01, PNorm = 69.7606, GNorm = 0.7698, lr_0 = 5.6811e-04
Loss = 1.6770e-01, PNorm = 69.7818, GNorm = 0.6927, lr_0 = 5.6772e-04
Loss = 1.5987e-01, PNorm = 69.7968, GNorm = 0.7229, lr_0 = 5.6733e-04
Loss = 1.5809e-01, PNorm = 69.8088, GNorm = 0.5928, lr_0 = 5.6695e-04
Loss = 1.5188e-01, PNorm = 69.8182, GNorm = 0.6743, lr_0 = 5.6656e-04
Loss = 1.4696e-01, PNorm = 69.8318, GNorm = 0.8518, lr_0 = 5.6617e-04
Loss = 1.2741e-01, PNorm = 69.8588, GNorm = 0.7222, lr_0 = 5.6578e-04
Loss = 1.4078e-01, PNorm = 69.8797, GNorm = 0.7459, lr_0 = 5.6539e-04
Loss = 1.4444e-01, PNorm = 69.8999, GNorm = 0.6379, lr_0 = 5.6501e-04
Loss = 1.7924e-01, PNorm = 69.9180, GNorm = 1.2518, lr_0 = 5.6462e-04
Loss = 1.3208e-01, PNorm = 69.9381, GNorm = 0.8128, lr_0 = 5.6423e-04
Loss = 1.4593e-01, PNorm = 69.9571, GNorm = 0.7716, lr_0 = 5.6385e-04
Loss = 1.4879e-01, PNorm = 69.9664, GNorm = 0.6596, lr_0 = 5.6346e-04
Loss = 1.4415e-01, PNorm = 69.9801, GNorm = 1.3023, lr_0 = 5.6307e-04
Loss = 1.5270e-01, PNorm = 69.9887, GNorm = 0.7186, lr_0 = 5.6269e-04
Loss = 1.4385e-01, PNorm = 70.0084, GNorm = 0.6049, lr_0 = 5.6230e-04
Validation mae = 0.246122
Epoch 9
Loss = 1.4107e-01, PNorm = 70.0270, GNorm = 0.7041, lr_0 = 5.6192e-04
Loss = 1.3118e-01, PNorm = 70.0465, GNorm = 1.2779, lr_0 = 5.6153e-04
Loss = 1.3832e-01, PNorm = 70.0643, GNorm = 0.8149, lr_0 = 5.6115e-04
Loss = 1.4478e-01, PNorm = 70.0826, GNorm = 1.0568, lr_0 = 5.6076e-04
Loss = 1.2480e-01, PNorm = 70.0990, GNorm = 0.5756, lr_0 = 5.6038e-04
Loss = 1.4609e-01, PNorm = 70.1192, GNorm = 0.6358, lr_0 = 5.6000e-04
Loss = 1.5006e-01, PNorm = 70.1410, GNorm = 0.8110, lr_0 = 5.5961e-04
Loss = 1.2903e-01, PNorm = 70.1668, GNorm = 0.7163, lr_0 = 5.5923e-04
Loss = 1.3927e-01, PNorm = 70.1822, GNorm = 1.2727, lr_0 = 5.5885e-04
Loss = 1.3751e-01, PNorm = 70.1969, GNorm = 0.9994, lr_0 = 5.5846e-04
Loss = 1.3893e-01, PNorm = 70.2223, GNorm = 0.6174, lr_0 = 5.5808e-04
Loss = 1.3393e-01, PNorm = 70.2477, GNorm = 0.6203, lr_0 = 5.5770e-04
Loss = 1.3534e-01, PNorm = 70.2732, GNorm = 0.8218, lr_0 = 5.5732e-04
Loss = 1.3111e-01, PNorm = 70.2959, GNorm = 0.7999, lr_0 = 5.5693e-04
Loss = 1.3933e-01, PNorm = 70.3181, GNorm = 0.7214, lr_0 = 5.5655e-04
Loss = 1.4586e-01, PNorm = 70.3412, GNorm = 1.0801, lr_0 = 5.5617e-04
Loss = 1.3108e-01, PNorm = 70.3591, GNorm = 0.5796, lr_0 = 5.5579e-04
Loss = 1.4682e-01, PNorm = 70.3721, GNorm = 1.1092, lr_0 = 5.5541e-04
Loss = 1.2482e-01, PNorm = 70.3858, GNorm = 0.8646, lr_0 = 5.5503e-04
Loss = 1.5109e-01, PNorm = 70.4054, GNorm = 0.7241, lr_0 = 5.5465e-04
Loss = 1.3777e-01, PNorm = 70.4262, GNorm = 0.7215, lr_0 = 5.5427e-04
Loss = 1.4530e-01, PNorm = 70.4426, GNorm = 0.5212, lr_0 = 5.5389e-04
Loss = 1.7207e-01, PNorm = 70.4625, GNorm = 1.3533, lr_0 = 5.5351e-04
Loss = 1.4226e-01, PNorm = 70.4801, GNorm = 0.8691, lr_0 = 5.5313e-04
Loss = 1.3553e-01, PNorm = 70.5017, GNorm = 0.7215, lr_0 = 5.5275e-04
Loss = 1.2899e-01, PNorm = 70.5227, GNorm = 0.6962, lr_0 = 5.5237e-04
Loss = 1.3953e-01, PNorm = 70.5456, GNorm = 0.6176, lr_0 = 5.5199e-04
Loss = 1.4772e-01, PNorm = 70.5621, GNorm = 0.7676, lr_0 = 5.5162e-04
Loss = 1.4675e-01, PNorm = 70.5862, GNorm = 1.1862, lr_0 = 5.5124e-04
Loss = 1.3783e-01, PNorm = 70.6068, GNorm = 1.3216, lr_0 = 5.5086e-04
Loss = 1.4060e-01, PNorm = 70.6172, GNorm = 0.8354, lr_0 = 5.5048e-04
Loss = 1.7126e-01, PNorm = 70.6355, GNorm = 0.5997, lr_0 = 5.5011e-04
Loss = 1.4939e-01, PNorm = 70.6584, GNorm = 1.4791, lr_0 = 5.4973e-04
Loss = 1.4425e-01, PNorm = 70.6827, GNorm = 0.9350, lr_0 = 5.4935e-04
Loss = 1.4337e-01, PNorm = 70.7087, GNorm = 0.6740, lr_0 = 5.4898e-04
Loss = 1.3662e-01, PNorm = 70.7291, GNorm = 0.5874, lr_0 = 5.4860e-04
Loss = 1.4816e-01, PNorm = 70.7461, GNorm = 0.5747, lr_0 = 5.4822e-04
Loss = 1.4364e-01, PNorm = 70.7620, GNorm = 0.6575, lr_0 = 5.4785e-04
Loss = 1.3283e-01, PNorm = 70.7785, GNorm = 0.8321, lr_0 = 5.4747e-04
Loss = 1.3548e-01, PNorm = 70.7917, GNorm = 0.9669, lr_0 = 5.4710e-04
Loss = 1.3046e-01, PNorm = 70.8048, GNorm = 0.7927, lr_0 = 5.4672e-04
Loss = 1.2351e-01, PNorm = 70.8215, GNorm = 0.9741, lr_0 = 5.4635e-04
Loss = 1.3978e-01, PNorm = 70.8427, GNorm = 0.6154, lr_0 = 5.4597e-04
Loss = 1.3552e-01, PNorm = 70.8600, GNorm = 0.5651, lr_0 = 5.4560e-04
Loss = 1.4191e-01, PNorm = 70.8762, GNorm = 0.5418, lr_0 = 5.4523e-04
Loss = 1.4992e-01, PNorm = 70.8961, GNorm = 0.7108, lr_0 = 5.4485e-04
Loss = 1.5575e-01, PNorm = 70.9189, GNorm = 1.0980, lr_0 = 5.4448e-04
Loss = 1.2720e-01, PNorm = 70.9407, GNorm = 0.8773, lr_0 = 5.4411e-04
Loss = 1.2720e-01, PNorm = 70.9575, GNorm = 0.4321, lr_0 = 5.4373e-04
Loss = 1.4249e-01, PNorm = 70.9703, GNorm = 0.5669, lr_0 = 5.4336e-04
Loss = 1.3022e-01, PNorm = 70.9874, GNorm = 0.7074, lr_0 = 5.4299e-04
Loss = 1.3912e-01, PNorm = 70.9961, GNorm = 0.4638, lr_0 = 5.4262e-04
Loss = 1.4406e-01, PNorm = 71.0026, GNorm = 1.3058, lr_0 = 5.4225e-04
Loss = 1.4584e-01, PNorm = 71.0154, GNorm = 0.6823, lr_0 = 5.4187e-04
Loss = 1.4209e-01, PNorm = 71.0342, GNorm = 1.0617, lr_0 = 5.4150e-04
Loss = 1.2496e-01, PNorm = 71.0524, GNorm = 0.5838, lr_0 = 5.4113e-04
Loss = 1.3209e-01, PNorm = 71.0722, GNorm = 0.7246, lr_0 = 5.4076e-04
Loss = 1.4070e-01, PNorm = 71.0827, GNorm = 0.8297, lr_0 = 5.4039e-04
Loss = 1.6034e-01, PNorm = 71.0935, GNorm = 0.8134, lr_0 = 5.4002e-04
Loss = 1.3892e-01, PNorm = 71.1131, GNorm = 0.6827, lr_0 = 5.3965e-04
Loss = 1.2382e-01, PNorm = 71.1298, GNorm = 0.7051, lr_0 = 5.3928e-04
Loss = 1.3276e-01, PNorm = 71.1446, GNorm = 0.7645, lr_0 = 5.3891e-04
Loss = 1.6494e-01, PNorm = 71.1612, GNorm = 0.6423, lr_0 = 5.3854e-04
Loss = 1.5034e-01, PNorm = 71.1793, GNorm = 1.0142, lr_0 = 5.3817e-04
Loss = 1.4630e-01, PNorm = 71.1936, GNorm = 1.1066, lr_0 = 5.3781e-04
Loss = 1.3614e-01, PNorm = 71.2196, GNorm = 1.0274, lr_0 = 5.3744e-04
Loss = 1.3436e-01, PNorm = 71.2344, GNorm = 1.1597, lr_0 = 5.3707e-04
Loss = 1.4355e-01, PNorm = 71.2434, GNorm = 0.8031, lr_0 = 5.3670e-04
Loss = 1.4264e-01, PNorm = 71.2587, GNorm = 0.9664, lr_0 = 5.3633e-04
Loss = 1.2777e-01, PNorm = 71.2695, GNorm = 0.9735, lr_0 = 5.3597e-04
Loss = 1.1502e-01, PNorm = 71.2805, GNorm = 0.7642, lr_0 = 5.3560e-04
Loss = 1.2440e-01, PNorm = 71.2970, GNorm = 0.5600, lr_0 = 5.3523e-04
Loss = 1.5358e-01, PNorm = 71.3184, GNorm = 0.5770, lr_0 = 5.3486e-04
Loss = 1.2531e-01, PNorm = 71.3288, GNorm = 0.7117, lr_0 = 5.3450e-04
Loss = 1.2946e-01, PNorm = 71.3356, GNorm = 1.1973, lr_0 = 5.3413e-04
Loss = 1.5637e-01, PNorm = 71.3443, GNorm = 0.5130, lr_0 = 5.3377e-04
Loss = 1.4230e-01, PNorm = 71.3618, GNorm = 0.5720, lr_0 = 5.3340e-04
Loss = 1.4333e-01, PNorm = 71.3830, GNorm = 1.0792, lr_0 = 5.3304e-04
Loss = 1.5086e-01, PNorm = 71.4042, GNorm = 0.7432, lr_0 = 5.3267e-04
Loss = 1.3935e-01, PNorm = 71.4169, GNorm = 0.3607, lr_0 = 5.3231e-04
Loss = 1.4709e-01, PNorm = 71.4283, GNorm = 0.7618, lr_0 = 5.3194e-04
Loss = 1.3740e-01, PNorm = 71.4381, GNorm = 0.7365, lr_0 = 5.3158e-04
Loss = 1.4075e-01, PNorm = 71.4516, GNorm = 0.6742, lr_0 = 5.3121e-04
Loss = 1.5665e-01, PNorm = 71.4641, GNorm = 1.3905, lr_0 = 5.3085e-04
Loss = 1.3587e-01, PNorm = 71.4893, GNorm = 0.7227, lr_0 = 5.3048e-04
Loss = 1.4583e-01, PNorm = 71.5079, GNorm = 1.3350, lr_0 = 5.3012e-04
Loss = 1.3496e-01, PNorm = 71.5295, GNorm = 0.9803, lr_0 = 5.2976e-04
Loss = 1.3817e-01, PNorm = 71.5442, GNorm = 0.8862, lr_0 = 5.2939e-04
Loss = 1.4387e-01, PNorm = 71.5616, GNorm = 1.0026, lr_0 = 5.2903e-04
Loss = 1.4659e-01, PNorm = 71.5749, GNorm = 0.5094, lr_0 = 5.2867e-04
Loss = 1.5428e-01, PNorm = 71.5934, GNorm = 1.3172, lr_0 = 5.2831e-04
Loss = 1.5239e-01, PNorm = 71.6083, GNorm = 0.8687, lr_0 = 5.2795e-04
Loss = 1.4425e-01, PNorm = 71.6288, GNorm = 0.7550, lr_0 = 5.2758e-04
Loss = 1.4852e-01, PNorm = 71.6388, GNorm = 0.8092, lr_0 = 5.2722e-04
Loss = 1.4018e-01, PNorm = 71.6566, GNorm = 1.1540, lr_0 = 5.2686e-04
Loss = 1.2210e-01, PNorm = 71.6778, GNorm = 0.8012, lr_0 = 5.2650e-04
Loss = 1.4569e-01, PNorm = 71.6939, GNorm = 0.6852, lr_0 = 5.2614e-04
Loss = 1.4772e-01, PNorm = 71.7019, GNorm = 0.7682, lr_0 = 5.2578e-04
Loss = 1.3213e-01, PNorm = 71.7159, GNorm = 0.9175, lr_0 = 5.2542e-04
Loss = 1.2782e-01, PNorm = 71.7344, GNorm = 0.7206, lr_0 = 5.2506e-04
Loss = 1.4219e-01, PNorm = 71.7448, GNorm = 0.7531, lr_0 = 5.2470e-04
Loss = 1.2155e-01, PNorm = 71.7594, GNorm = 0.5054, lr_0 = 5.2434e-04
Loss = 1.5643e-01, PNorm = 71.7741, GNorm = 0.9862, lr_0 = 5.2398e-04
Loss = 1.4038e-01, PNorm = 71.7853, GNorm = 0.5633, lr_0 = 5.2362e-04
Loss = 1.4545e-01, PNorm = 71.8012, GNorm = 0.5562, lr_0 = 5.2326e-04
Loss = 1.4003e-01, PNorm = 71.8144, GNorm = 0.6552, lr_0 = 5.2290e-04
Loss = 1.3848e-01, PNorm = 71.8310, GNorm = 0.6804, lr_0 = 5.2255e-04
Loss = 1.3394e-01, PNorm = 71.8470, GNorm = 0.5247, lr_0 = 5.2219e-04
Loss = 1.4888e-01, PNorm = 71.8667, GNorm = 1.6987, lr_0 = 5.2183e-04
Loss = 1.4298e-01, PNorm = 71.8731, GNorm = 0.6043, lr_0 = 5.2147e-04
Loss = 1.4603e-01, PNorm = 71.8892, GNorm = 0.7640, lr_0 = 5.2112e-04
Loss = 1.4364e-01, PNorm = 71.9057, GNorm = 0.6452, lr_0 = 5.2076e-04
Loss = 1.3499e-01, PNorm = 71.9202, GNorm = 1.4962, lr_0 = 5.2040e-04
Loss = 1.6780e-01, PNorm = 71.9309, GNorm = 1.0596, lr_0 = 5.2005e-04
Loss = 1.4705e-01, PNorm = 71.9368, GNorm = 0.9324, lr_0 = 5.1969e-04
Loss = 1.5559e-01, PNorm = 71.9563, GNorm = 1.2120, lr_0 = 5.1933e-04
Loss = 1.5446e-01, PNorm = 71.9721, GNorm = 0.6256, lr_0 = 5.1898e-04
Loss = 1.3584e-01, PNorm = 71.9890, GNorm = 0.5848, lr_0 = 5.1862e-04
Loss = 1.4547e-01, PNorm = 72.0019, GNorm = 0.5493, lr_0 = 5.1827e-04
Loss = 1.3885e-01, PNorm = 72.0188, GNorm = 0.6678, lr_0 = 5.1791e-04
Validation mae = 0.240505
Epoch 10
Loss = 1.2377e-01, PNorm = 72.0381, GNorm = 0.8347, lr_0 = 5.1756e-04
Loss = 1.4086e-01, PNorm = 72.0549, GNorm = 0.7319, lr_0 = 5.1720e-04
Loss = 1.1464e-01, PNorm = 72.0713, GNorm = 0.5211, lr_0 = 5.1685e-04
Loss = 1.3511e-01, PNorm = 72.0784, GNorm = 1.1231, lr_0 = 5.1649e-04
Loss = 1.3162e-01, PNorm = 72.0911, GNorm = 1.1283, lr_0 = 5.1614e-04
Loss = 1.2707e-01, PNorm = 72.1073, GNorm = 0.6562, lr_0 = 5.1579e-04
Loss = 1.4734e-01, PNorm = 72.1191, GNorm = 0.7256, lr_0 = 5.1543e-04
Loss = 1.4310e-01, PNorm = 72.1375, GNorm = 0.6035, lr_0 = 5.1508e-04
Loss = 1.0539e-01, PNorm = 72.1527, GNorm = 0.7656, lr_0 = 5.1473e-04
Loss = 1.1722e-01, PNorm = 72.1677, GNorm = 0.7559, lr_0 = 5.1437e-04
Loss = 1.2036e-01, PNorm = 72.1807, GNorm = 0.5059, lr_0 = 5.1402e-04
Loss = 1.4168e-01, PNorm = 72.1998, GNorm = 0.6739, lr_0 = 5.1367e-04
Loss = 1.4035e-01, PNorm = 72.2173, GNorm = 0.6846, lr_0 = 5.1332e-04
Loss = 1.3775e-01, PNorm = 72.2405, GNorm = 1.1066, lr_0 = 5.1297e-04
Loss = 1.3625e-01, PNorm = 72.2586, GNorm = 0.6424, lr_0 = 5.1262e-04
Loss = 1.3294e-01, PNorm = 72.2711, GNorm = 0.4599, lr_0 = 5.1226e-04
Loss = 1.3224e-01, PNorm = 72.2852, GNorm = 0.7859, lr_0 = 5.1191e-04
Loss = 1.1622e-01, PNorm = 72.2999, GNorm = 0.7107, lr_0 = 5.1156e-04
Loss = 1.2114e-01, PNorm = 72.3186, GNorm = 0.7049, lr_0 = 5.1121e-04
Loss = 1.2370e-01, PNorm = 72.3370, GNorm = 1.0445, lr_0 = 5.1086e-04
Loss = 1.2927e-01, PNorm = 72.3516, GNorm = 1.0212, lr_0 = 5.1051e-04
Loss = 1.3671e-01, PNorm = 72.3645, GNorm = 0.7254, lr_0 = 5.1016e-04
Loss = 1.2515e-01, PNorm = 72.3741, GNorm = 0.8129, lr_0 = 5.0981e-04
Loss = 1.3802e-01, PNorm = 72.3902, GNorm = 0.7140, lr_0 = 5.0946e-04
Loss = 1.4016e-01, PNorm = 72.4029, GNorm = 0.7645, lr_0 = 5.0911e-04
Loss = 1.2269e-01, PNorm = 72.4167, GNorm = 0.6125, lr_0 = 5.0877e-04
Loss = 1.3696e-01, PNorm = 72.4292, GNorm = 0.7022, lr_0 = 5.0842e-04
Loss = 1.2132e-01, PNorm = 72.4460, GNorm = 0.7936, lr_0 = 5.0807e-04
Loss = 1.2915e-01, PNorm = 72.4616, GNorm = 0.5545, lr_0 = 5.0772e-04
Loss = 1.2330e-01, PNorm = 72.4800, GNorm = 0.6586, lr_0 = 5.0737e-04
Loss = 1.6638e-01, PNorm = 72.4914, GNorm = 0.6775, lr_0 = 5.0703e-04
Loss = 1.2609e-01, PNorm = 72.5115, GNorm = 0.8405, lr_0 = 5.0668e-04
Loss = 1.3951e-01, PNorm = 72.5208, GNorm = 0.6272, lr_0 = 5.0633e-04
Loss = 1.1851e-01, PNorm = 72.5323, GNorm = 0.7172, lr_0 = 5.0598e-04
Loss = 1.5088e-01, PNorm = 72.5436, GNorm = 0.6872, lr_0 = 5.0564e-04
Loss = 1.2219e-01, PNorm = 72.5593, GNorm = 0.8446, lr_0 = 5.0529e-04
Loss = 1.1550e-01, PNorm = 72.5782, GNorm = 0.5097, lr_0 = 5.0494e-04
Loss = 1.4499e-01, PNorm = 72.5945, GNorm = 0.7139, lr_0 = 5.0460e-04
Loss = 1.3975e-01, PNorm = 72.6131, GNorm = 0.6007, lr_0 = 5.0425e-04
Loss = 1.3964e-01, PNorm = 72.6224, GNorm = 0.9443, lr_0 = 5.0391e-04
Loss = 1.4861e-01, PNorm = 72.6357, GNorm = 0.6618, lr_0 = 5.0356e-04
Loss = 1.2456e-01, PNorm = 72.6463, GNorm = 0.5266, lr_0 = 5.0322e-04
Loss = 1.4335e-01, PNorm = 72.6560, GNorm = 0.5692, lr_0 = 5.0287e-04
Loss = 1.3279e-01, PNorm = 72.6659, GNorm = 0.8654, lr_0 = 5.0253e-04
Loss = 1.4214e-01, PNorm = 72.6817, GNorm = 1.0866, lr_0 = 5.0218e-04
Loss = 1.3123e-01, PNorm = 72.7009, GNorm = 0.4832, lr_0 = 5.0184e-04
Loss = 1.3916e-01, PNorm = 72.7192, GNorm = 0.6338, lr_0 = 5.0150e-04
Loss = 1.4265e-01, PNorm = 72.7354, GNorm = 0.9159, lr_0 = 5.0115e-04
Loss = 1.1973e-01, PNorm = 72.7557, GNorm = 0.8446, lr_0 = 5.0081e-04
Loss = 1.4633e-01, PNorm = 72.7669, GNorm = 0.7090, lr_0 = 5.0047e-04
Loss = 1.2737e-01, PNorm = 72.7849, GNorm = 0.5892, lr_0 = 5.0012e-04
Loss = 1.4421e-01, PNorm = 72.7928, GNorm = 0.9343, lr_0 = 4.9978e-04
Loss = 1.7166e-01, PNorm = 72.8077, GNorm = 0.7572, lr_0 = 4.9944e-04
Loss = 1.2894e-01, PNorm = 72.8203, GNorm = 0.7430, lr_0 = 4.9910e-04
Loss = 1.3477e-01, PNorm = 72.8379, GNorm = 0.9379, lr_0 = 4.9875e-04
Loss = 1.3591e-01, PNorm = 72.8542, GNorm = 0.6772, lr_0 = 4.9841e-04
Loss = 1.2939e-01, PNorm = 72.8765, GNorm = 0.7240, lr_0 = 4.9807e-04
Loss = 1.3696e-01, PNorm = 72.8880, GNorm = 0.8009, lr_0 = 4.9773e-04
Loss = 1.4937e-01, PNorm = 72.9045, GNorm = 0.5790, lr_0 = 4.9739e-04
Loss = 1.3970e-01, PNorm = 72.9192, GNorm = 0.9341, lr_0 = 4.9705e-04
Loss = 1.3303e-01, PNorm = 72.9273, GNorm = 1.0218, lr_0 = 4.9671e-04
Loss = 1.4826e-01, PNorm = 72.9418, GNorm = 0.7363, lr_0 = 4.9637e-04
Loss = 1.2134e-01, PNorm = 72.9599, GNorm = 0.6906, lr_0 = 4.9603e-04
Loss = 1.4771e-01, PNorm = 72.9735, GNorm = 0.7602, lr_0 = 4.9569e-04
Loss = 1.3114e-01, PNorm = 72.9910, GNorm = 1.0817, lr_0 = 4.9535e-04
Loss = 1.4607e-01, PNorm = 73.0073, GNorm = 0.7713, lr_0 = 4.9501e-04
Loss = 1.3276e-01, PNorm = 73.0212, GNorm = 1.1315, lr_0 = 4.9467e-04
Loss = 1.3513e-01, PNorm = 73.0357, GNorm = 0.6880, lr_0 = 4.9433e-04
Loss = 1.3243e-01, PNorm = 73.0470, GNorm = 0.5604, lr_0 = 4.9399e-04
Loss = 1.2058e-01, PNorm = 73.0641, GNorm = 0.6450, lr_0 = 4.9365e-04
Loss = 1.3580e-01, PNorm = 73.0752, GNorm = 0.8676, lr_0 = 4.9332e-04
Loss = 1.2108e-01, PNorm = 73.0816, GNorm = 0.5519, lr_0 = 4.9298e-04
Loss = 1.3589e-01, PNorm = 73.0922, GNorm = 0.6743, lr_0 = 4.9264e-04
Loss = 1.3270e-01, PNorm = 73.1048, GNorm = 1.1573, lr_0 = 4.9230e-04
Loss = 1.2737e-01, PNorm = 73.1144, GNorm = 0.8990, lr_0 = 4.9197e-04
Loss = 1.3454e-01, PNorm = 73.1321, GNorm = 0.6281, lr_0 = 4.9163e-04
Loss = 1.6166e-01, PNorm = 73.1507, GNorm = 0.6522, lr_0 = 4.9129e-04
Loss = 1.3214e-01, PNorm = 73.1713, GNorm = 0.9547, lr_0 = 4.9095e-04
Loss = 1.3516e-01, PNorm = 73.1873, GNorm = 0.8317, lr_0 = 4.9062e-04
Loss = 1.3512e-01, PNorm = 73.2038, GNorm = 0.8906, lr_0 = 4.9028e-04
Loss = 1.4425e-01, PNorm = 73.2228, GNorm = 0.8846, lr_0 = 4.8995e-04
Loss = 1.4019e-01, PNorm = 73.2335, GNorm = 0.7308, lr_0 = 4.8961e-04
Loss = 1.3140e-01, PNorm = 73.2473, GNorm = 0.6861, lr_0 = 4.8928e-04
Loss = 1.2862e-01, PNorm = 73.2653, GNorm = 0.5405, lr_0 = 4.8894e-04
Loss = 1.3874e-01, PNorm = 73.2802, GNorm = 0.6891, lr_0 = 4.8861e-04
Loss = 1.2711e-01, PNorm = 73.2903, GNorm = 0.5749, lr_0 = 4.8827e-04
Loss = 1.5424e-01, PNorm = 73.3010, GNorm = 0.7895, lr_0 = 4.8794e-04
Loss = 1.4005e-01, PNorm = 73.3131, GNorm = 0.6459, lr_0 = 4.8760e-04
Loss = 1.2662e-01, PNorm = 73.3205, GNorm = 1.0177, lr_0 = 4.8727e-04
Loss = 1.8082e-01, PNorm = 73.3307, GNorm = 0.8414, lr_0 = 4.8693e-04
Loss = 1.3025e-01, PNorm = 73.3430, GNorm = 0.5612, lr_0 = 4.8660e-04
Loss = 1.4859e-01, PNorm = 73.3541, GNorm = 1.3450, lr_0 = 4.8627e-04
Loss = 1.3037e-01, PNorm = 73.3718, GNorm = 0.5610, lr_0 = 4.8593e-04
Loss = 1.3277e-01, PNorm = 73.3841, GNorm = 0.6609, lr_0 = 4.8560e-04
Loss = 1.2257e-01, PNorm = 73.3955, GNorm = 0.9842, lr_0 = 4.8527e-04
Loss = 1.3704e-01, PNorm = 73.4029, GNorm = 1.0030, lr_0 = 4.8494e-04
Loss = 1.2807e-01, PNorm = 73.4140, GNorm = 0.4349, lr_0 = 4.8460e-04
Loss = 1.2267e-01, PNorm = 73.4341, GNorm = 0.7236, lr_0 = 4.8427e-04
Loss = 1.4819e-01, PNorm = 73.4469, GNorm = 0.7856, lr_0 = 4.8394e-04
Loss = 1.3582e-01, PNorm = 73.4618, GNorm = 1.2515, lr_0 = 4.8361e-04
Loss = 1.2946e-01, PNorm = 73.4733, GNorm = 0.7212, lr_0 = 4.8328e-04
Loss = 1.3032e-01, PNorm = 73.4859, GNorm = 0.8019, lr_0 = 4.8295e-04
Loss = 1.3205e-01, PNorm = 73.4939, GNorm = 0.6762, lr_0 = 4.8262e-04
Loss = 1.2016e-01, PNorm = 73.5057, GNorm = 0.6926, lr_0 = 4.8228e-04
Loss = 1.3824e-01, PNorm = 73.5200, GNorm = 1.1841, lr_0 = 4.8195e-04
Loss = 1.5010e-01, PNorm = 73.5279, GNorm = 1.0153, lr_0 = 4.8162e-04
Loss = 1.3327e-01, PNorm = 73.5432, GNorm = 1.0414, lr_0 = 4.8129e-04
Loss = 1.4770e-01, PNorm = 73.5576, GNorm = 1.1274, lr_0 = 4.8096e-04
Loss = 1.5125e-01, PNorm = 73.5813, GNorm = 0.9116, lr_0 = 4.8064e-04
Loss = 1.2042e-01, PNorm = 73.5945, GNorm = 1.0543, lr_0 = 4.8031e-04
Loss = 1.3562e-01, PNorm = 73.6055, GNorm = 0.5941, lr_0 = 4.7998e-04
Loss = 1.2208e-01, PNorm = 73.6201, GNorm = 0.7573, lr_0 = 4.7965e-04
Loss = 1.3896e-01, PNorm = 73.6327, GNorm = 0.8467, lr_0 = 4.7932e-04
Loss = 1.3642e-01, PNorm = 73.6496, GNorm = 0.9682, lr_0 = 4.7899e-04
Loss = 1.1997e-01, PNorm = 73.6655, GNorm = 1.1039, lr_0 = 4.7866e-04
Loss = 1.2259e-01, PNorm = 73.6800, GNorm = 0.8401, lr_0 = 4.7833e-04
Loss = 1.4031e-01, PNorm = 73.6885, GNorm = 1.0953, lr_0 = 4.7801e-04
Loss = 1.3180e-01, PNorm = 73.7040, GNorm = 0.6820, lr_0 = 4.7768e-04
Loss = 1.3805e-01, PNorm = 73.7193, GNorm = 0.6872, lr_0 = 4.7735e-04
Loss = 1.2977e-01, PNorm = 73.7301, GNorm = 0.7755, lr_0 = 4.7703e-04
Validation mae = 0.237353
Epoch 11
Loss = 1.3418e-01, PNorm = 73.7459, GNorm = 1.0348, lr_0 = 4.7670e-04
Loss = 1.3174e-01, PNorm = 73.7592, GNorm = 0.6419, lr_0 = 4.7637e-04
Loss = 1.2939e-01, PNorm = 73.7768, GNorm = 0.6122, lr_0 = 4.7605e-04
Loss = 1.1746e-01, PNorm = 73.7888, GNorm = 1.4618, lr_0 = 4.7572e-04
Loss = 1.4718e-01, PNorm = 73.8056, GNorm = 0.7628, lr_0 = 4.7539e-04
Loss = 1.1555e-01, PNorm = 73.8211, GNorm = 0.8299, lr_0 = 4.7507e-04
Loss = 1.2502e-01, PNorm = 73.8377, GNorm = 0.5840, lr_0 = 4.7474e-04
Loss = 1.2103e-01, PNorm = 73.8516, GNorm = 0.6474, lr_0 = 4.7442e-04
Loss = 1.2560e-01, PNorm = 73.8666, GNorm = 0.6526, lr_0 = 4.7409e-04
Loss = 1.3257e-01, PNorm = 73.8770, GNorm = 0.8180, lr_0 = 4.7377e-04
Loss = 1.3848e-01, PNorm = 73.8895, GNorm = 0.5107, lr_0 = 4.7344e-04
Loss = 1.2803e-01, PNorm = 73.8992, GNorm = 0.8014, lr_0 = 4.7312e-04
Loss = 1.3363e-01, PNorm = 73.9172, GNorm = 0.7447, lr_0 = 4.7279e-04
Loss = 1.3970e-01, PNorm = 73.9323, GNorm = 0.7337, lr_0 = 4.7247e-04
Loss = 1.2307e-01, PNorm = 73.9499, GNorm = 0.7835, lr_0 = 4.7215e-04
Loss = 1.3746e-01, PNorm = 73.9656, GNorm = 1.0150, lr_0 = 4.7182e-04
Loss = 1.0909e-01, PNorm = 73.9760, GNorm = 0.5721, lr_0 = 4.7150e-04
Loss = 1.4306e-01, PNorm = 73.9898, GNorm = 0.7128, lr_0 = 4.7118e-04
Loss = 1.1263e-01, PNorm = 74.0031, GNorm = 0.5762, lr_0 = 4.7085e-04
Loss = 1.2956e-01, PNorm = 74.0147, GNorm = 0.6806, lr_0 = 4.7053e-04
Loss = 1.1837e-01, PNorm = 74.0293, GNorm = 1.0921, lr_0 = 4.7021e-04
Loss = 1.3417e-01, PNorm = 74.0448, GNorm = 0.6358, lr_0 = 4.6989e-04
Loss = 1.4117e-01, PNorm = 74.0638, GNorm = 0.5812, lr_0 = 4.6957e-04
Loss = 1.3059e-01, PNorm = 74.0758, GNorm = 0.8292, lr_0 = 4.6924e-04
Loss = 1.2179e-01, PNorm = 74.0864, GNorm = 0.5365, lr_0 = 4.6892e-04
Loss = 1.1723e-01, PNorm = 74.1001, GNorm = 0.9629, lr_0 = 4.6860e-04
Loss = 1.2260e-01, PNorm = 74.1145, GNorm = 0.8849, lr_0 = 4.6828e-04
Loss = 1.2070e-01, PNorm = 74.1282, GNorm = 0.8508, lr_0 = 4.6796e-04
Loss = 1.2772e-01, PNorm = 74.1474, GNorm = 0.8771, lr_0 = 4.6764e-04
Loss = 1.3497e-01, PNorm = 74.1584, GNorm = 0.7844, lr_0 = 4.6732e-04
Loss = 1.2165e-01, PNorm = 74.1693, GNorm = 0.4126, lr_0 = 4.6700e-04
Loss = 1.1318e-01, PNorm = 74.1756, GNorm = 0.5195, lr_0 = 4.6668e-04
Loss = 1.2501e-01, PNorm = 74.1894, GNorm = 1.2688, lr_0 = 4.6636e-04
Loss = 1.1883e-01, PNorm = 74.2025, GNorm = 0.6284, lr_0 = 4.6604e-04
Loss = 1.1019e-01, PNorm = 74.2185, GNorm = 0.8909, lr_0 = 4.6572e-04
Loss = 1.5159e-01, PNorm = 74.2325, GNorm = 0.8006, lr_0 = 4.6540e-04
Loss = 1.2653e-01, PNorm = 74.2459, GNorm = 0.8475, lr_0 = 4.6508e-04
Loss = 1.3448e-01, PNorm = 74.2588, GNorm = 1.2592, lr_0 = 4.6476e-04
Loss = 1.2109e-01, PNorm = 74.2750, GNorm = 0.9927, lr_0 = 4.6445e-04
Loss = 1.2444e-01, PNorm = 74.2942, GNorm = 0.8983, lr_0 = 4.6413e-04
Loss = 1.2821e-01, PNorm = 74.3126, GNorm = 0.8456, lr_0 = 4.6381e-04
Loss = 1.1952e-01, PNorm = 74.3329, GNorm = 0.7453, lr_0 = 4.6349e-04
Loss = 1.1886e-01, PNorm = 74.3470, GNorm = 0.4929, lr_0 = 4.6317e-04
Loss = 1.2739e-01, PNorm = 74.3591, GNorm = 0.7292, lr_0 = 4.6286e-04
Loss = 1.2274e-01, PNorm = 74.3720, GNorm = 0.7582, lr_0 = 4.6254e-04
Loss = 1.3017e-01, PNorm = 74.3801, GNorm = 0.6982, lr_0 = 4.6222e-04
Loss = 1.4241e-01, PNorm = 74.3901, GNorm = 1.1047, lr_0 = 4.6191e-04
Loss = 1.2615e-01, PNorm = 74.4041, GNorm = 1.0823, lr_0 = 4.6159e-04
Loss = 1.1046e-01, PNorm = 74.4156, GNorm = 0.5992, lr_0 = 4.6127e-04
Loss = 1.3870e-01, PNorm = 74.4292, GNorm = 0.6939, lr_0 = 4.6096e-04
Loss = 1.1795e-01, PNorm = 74.4458, GNorm = 0.7945, lr_0 = 4.6064e-04
Loss = 1.2925e-01, PNorm = 74.4598, GNorm = 1.0399, lr_0 = 4.6033e-04
Loss = 1.2156e-01, PNorm = 74.4720, GNorm = 0.5877, lr_0 = 4.6001e-04
Loss = 1.1971e-01, PNorm = 74.4865, GNorm = 0.8762, lr_0 = 4.5970e-04
Loss = 1.2210e-01, PNorm = 74.4990, GNorm = 0.5491, lr_0 = 4.5938e-04
Loss = 1.1997e-01, PNorm = 74.5141, GNorm = 1.2524, lr_0 = 4.5907e-04
Loss = 1.3801e-01, PNorm = 74.5268, GNorm = 0.7594, lr_0 = 4.5875e-04
Loss = 1.4137e-01, PNorm = 74.5382, GNorm = 0.9569, lr_0 = 4.5844e-04
Loss = 1.3349e-01, PNorm = 74.5568, GNorm = 0.6538, lr_0 = 4.5812e-04
Loss = 1.2151e-01, PNorm = 74.5689, GNorm = 1.1821, lr_0 = 4.5781e-04
Loss = 1.2035e-01, PNorm = 74.5835, GNorm = 0.5494, lr_0 = 4.5750e-04
Loss = 1.4069e-01, PNorm = 74.5932, GNorm = 0.7780, lr_0 = 4.5718e-04
Loss = 1.2400e-01, PNorm = 74.6019, GNorm = 0.6124, lr_0 = 4.5687e-04
Loss = 1.1114e-01, PNorm = 74.6070, GNorm = 0.5442, lr_0 = 4.5656e-04
Loss = 1.0544e-01, PNorm = 74.6180, GNorm = 0.5311, lr_0 = 4.5624e-04
Loss = 1.6470e-01, PNorm = 74.6297, GNorm = 0.6487, lr_0 = 4.5593e-04
Loss = 1.3137e-01, PNorm = 74.6438, GNorm = 0.7138, lr_0 = 4.5562e-04
Loss = 1.2493e-01, PNorm = 74.6543, GNorm = 0.4915, lr_0 = 4.5531e-04
Loss = 1.3668e-01, PNorm = 74.6735, GNorm = 0.6904, lr_0 = 4.5499e-04
Loss = 1.1546e-01, PNorm = 74.6855, GNorm = 0.7134, lr_0 = 4.5468e-04
Loss = 1.3116e-01, PNorm = 74.7011, GNorm = 0.9123, lr_0 = 4.5437e-04
Loss = 1.4245e-01, PNorm = 74.7117, GNorm = 0.6845, lr_0 = 4.5406e-04
Loss = 1.2172e-01, PNorm = 74.7251, GNorm = 1.0046, lr_0 = 4.5375e-04
Loss = 1.3378e-01, PNorm = 74.7343, GNorm = 0.7299, lr_0 = 4.5344e-04
Loss = 1.3622e-01, PNorm = 74.7505, GNorm = 0.7542, lr_0 = 4.5313e-04
Loss = 1.3443e-01, PNorm = 74.7637, GNorm = 0.7564, lr_0 = 4.5282e-04
Loss = 1.1640e-01, PNorm = 74.7730, GNorm = 0.7867, lr_0 = 4.5251e-04
Loss = 1.5268e-01, PNorm = 74.7855, GNorm = 0.9890, lr_0 = 4.5220e-04
Loss = 1.4473e-01, PNorm = 74.7999, GNorm = 0.7137, lr_0 = 4.5189e-04
Loss = 1.2997e-01, PNorm = 74.8126, GNorm = 0.8127, lr_0 = 4.5158e-04
Loss = 1.1640e-01, PNorm = 74.8294, GNorm = 1.0474, lr_0 = 4.5127e-04
Loss = 1.2203e-01, PNorm = 74.8413, GNorm = 0.5950, lr_0 = 4.5096e-04
Loss = 1.2805e-01, PNorm = 74.8557, GNorm = 0.5581, lr_0 = 4.5065e-04
Loss = 1.4237e-01, PNorm = 74.8666, GNorm = 0.7409, lr_0 = 4.5034e-04
Loss = 1.3482e-01, PNorm = 74.8767, GNorm = 1.0014, lr_0 = 4.5003e-04
Loss = 1.4208e-01, PNorm = 74.8859, GNorm = 0.8781, lr_0 = 4.4972e-04
Loss = 1.3148e-01, PNorm = 74.8999, GNorm = 0.7595, lr_0 = 4.4942e-04
Loss = 1.2694e-01, PNorm = 74.9185, GNorm = 0.6193, lr_0 = 4.4911e-04
Loss = 1.3352e-01, PNorm = 74.9302, GNorm = 0.6267, lr_0 = 4.4880e-04
Loss = 1.2189e-01, PNorm = 74.9424, GNorm = 0.9198, lr_0 = 4.4849e-04
Loss = 1.2221e-01, PNorm = 74.9473, GNorm = 1.0352, lr_0 = 4.4819e-04
Loss = 1.2586e-01, PNorm = 74.9533, GNorm = 0.5983, lr_0 = 4.4788e-04
Loss = 1.1840e-01, PNorm = 74.9633, GNorm = 0.6736, lr_0 = 4.4757e-04
Loss = 1.3583e-01, PNorm = 74.9716, GNorm = 0.6081, lr_0 = 4.4727e-04
Loss = 1.2925e-01, PNorm = 74.9844, GNorm = 0.7210, lr_0 = 4.4696e-04
Loss = 1.1055e-01, PNorm = 74.9909, GNorm = 0.7578, lr_0 = 4.4665e-04
Loss = 1.2526e-01, PNorm = 75.0018, GNorm = 0.6662, lr_0 = 4.4635e-04
Loss = 1.2940e-01, PNorm = 75.0169, GNorm = 0.6148, lr_0 = 4.4604e-04
Loss = 1.3690e-01, PNorm = 75.0353, GNorm = 0.4323, lr_0 = 4.4574e-04
Loss = 1.4806e-01, PNorm = 75.0461, GNorm = 0.9742, lr_0 = 4.4543e-04
Loss = 1.2076e-01, PNorm = 75.0602, GNorm = 1.1097, lr_0 = 4.4513e-04
Loss = 1.2774e-01, PNorm = 75.0685, GNorm = 0.6485, lr_0 = 4.4482e-04
Loss = 1.3793e-01, PNorm = 75.0788, GNorm = 0.6209, lr_0 = 4.4452e-04
Loss = 1.4462e-01, PNorm = 75.0922, GNorm = 0.8131, lr_0 = 4.4421e-04
Loss = 1.1940e-01, PNorm = 75.1073, GNorm = 1.0350, lr_0 = 4.4391e-04
Loss = 1.3605e-01, PNorm = 75.1203, GNorm = 0.5920, lr_0 = 4.4360e-04
Loss = 1.2632e-01, PNorm = 75.1327, GNorm = 1.2452, lr_0 = 4.4330e-04
Loss = 1.2875e-01, PNorm = 75.1506, GNorm = 1.2612, lr_0 = 4.4299e-04
Loss = 1.2689e-01, PNorm = 75.1650, GNorm = 0.8534, lr_0 = 4.4269e-04
Loss = 1.2425e-01, PNorm = 75.1769, GNorm = 0.5827, lr_0 = 4.4239e-04
Loss = 1.4049e-01, PNorm = 75.1884, GNorm = 0.9443, lr_0 = 4.4209e-04
Loss = 1.1676e-01, PNorm = 75.2057, GNorm = 0.5598, lr_0 = 4.4178e-04
Loss = 1.2389e-01, PNorm = 75.2277, GNorm = 0.8654, lr_0 = 4.4148e-04
Loss = 1.2046e-01, PNorm = 75.2395, GNorm = 0.7704, lr_0 = 4.4118e-04
Loss = 1.1717e-01, PNorm = 75.2504, GNorm = 0.8539, lr_0 = 4.4088e-04
Loss = 1.2082e-01, PNorm = 75.2616, GNorm = 0.6502, lr_0 = 4.4057e-04
Loss = 1.3038e-01, PNorm = 75.2714, GNorm = 0.4682, lr_0 = 4.4027e-04
Loss = 1.4181e-01, PNorm = 75.2864, GNorm = 0.6744, lr_0 = 4.3997e-04
Loss = 1.2077e-01, PNorm = 75.2948, GNorm = 0.7377, lr_0 = 4.3967e-04
Loss = 1.3633e-01, PNorm = 75.3041, GNorm = 0.8184, lr_0 = 4.3937e-04
Validation mae = 0.233615
Epoch 12
Loss = 1.1393e-01, PNorm = 75.3124, GNorm = 0.6945, lr_0 = 4.3907e-04
Loss = 1.2275e-01, PNorm = 75.3267, GNorm = 0.6593, lr_0 = 4.3877e-04
Loss = 1.2506e-01, PNorm = 75.3345, GNorm = 1.1612, lr_0 = 4.3846e-04
Loss = 1.2318e-01, PNorm = 75.3452, GNorm = 0.8214, lr_0 = 4.3816e-04
Loss = 1.1258e-01, PNorm = 75.3587, GNorm = 0.9401, lr_0 = 4.3786e-04
Loss = 1.1650e-01, PNorm = 75.3673, GNorm = 0.8776, lr_0 = 4.3756e-04
Loss = 1.2294e-01, PNorm = 75.3781, GNorm = 0.6582, lr_0 = 4.3726e-04
Loss = 1.1098e-01, PNorm = 75.3898, GNorm = 0.6490, lr_0 = 4.3696e-04
Loss = 1.1110e-01, PNorm = 75.4033, GNorm = 0.6402, lr_0 = 4.3667e-04
Loss = 1.2457e-01, PNorm = 75.4184, GNorm = 0.5797, lr_0 = 4.3637e-04
Loss = 1.1026e-01, PNorm = 75.4298, GNorm = 0.7749, lr_0 = 4.3607e-04
Loss = 1.2679e-01, PNorm = 75.4405, GNorm = 0.8448, lr_0 = 4.3577e-04
Loss = 1.2618e-01, PNorm = 75.4584, GNorm = 0.6305, lr_0 = 4.3547e-04
Loss = 1.3341e-01, PNorm = 75.4731, GNorm = 1.0229, lr_0 = 4.3517e-04
Loss = 1.2498e-01, PNorm = 75.4921, GNorm = 0.5917, lr_0 = 4.3487e-04
Loss = 1.2204e-01, PNorm = 75.5102, GNorm = 0.5199, lr_0 = 4.3458e-04
Loss = 1.2333e-01, PNorm = 75.5234, GNorm = 0.4705, lr_0 = 4.3428e-04
Loss = 1.2512e-01, PNorm = 75.5359, GNorm = 0.8880, lr_0 = 4.3398e-04
Loss = 1.3804e-01, PNorm = 75.5454, GNorm = 0.6110, lr_0 = 4.3368e-04
Loss = 1.1706e-01, PNorm = 75.5531, GNorm = 0.5678, lr_0 = 4.3339e-04
Loss = 1.2009e-01, PNorm = 75.5626, GNorm = 0.5735, lr_0 = 4.3309e-04
Loss = 1.2461e-01, PNorm = 75.5766, GNorm = 0.8605, lr_0 = 4.3279e-04
Loss = 1.1622e-01, PNorm = 75.5857, GNorm = 1.0687, lr_0 = 4.3250e-04
Loss = 1.1100e-01, PNorm = 75.5925, GNorm = 0.7047, lr_0 = 4.3220e-04
Loss = 1.0844e-01, PNorm = 75.6053, GNorm = 0.5637, lr_0 = 4.3190e-04
Loss = 1.1909e-01, PNorm = 75.6219, GNorm = 1.3335, lr_0 = 4.3161e-04
Loss = 1.0560e-01, PNorm = 75.6390, GNorm = 0.6877, lr_0 = 4.3131e-04
Loss = 1.2951e-01, PNorm = 75.6511, GNorm = 0.6331, lr_0 = 4.3102e-04
Loss = 1.0789e-01, PNorm = 75.6658, GNorm = 0.7962, lr_0 = 4.3072e-04
Loss = 1.4079e-01, PNorm = 75.6831, GNorm = 0.7843, lr_0 = 4.3043e-04
Loss = 1.1319e-01, PNorm = 75.6991, GNorm = 0.8474, lr_0 = 4.3013e-04
Loss = 1.2911e-01, PNorm = 75.7136, GNorm = 0.7266, lr_0 = 4.2984e-04
Loss = 1.0514e-01, PNorm = 75.7210, GNorm = 0.6350, lr_0 = 4.2954e-04
Loss = 1.3023e-01, PNorm = 75.7291, GNorm = 0.4889, lr_0 = 4.2925e-04
Loss = 1.1928e-01, PNorm = 75.7386, GNorm = 0.6781, lr_0 = 4.2895e-04
Loss = 1.1973e-01, PNorm = 75.7420, GNorm = 0.6211, lr_0 = 4.2866e-04
Loss = 1.2561e-01, PNorm = 75.7567, GNorm = 1.2534, lr_0 = 4.2837e-04
Loss = 1.4730e-01, PNorm = 75.7724, GNorm = 0.7026, lr_0 = 4.2807e-04
Loss = 1.4026e-01, PNorm = 75.7856, GNorm = 0.8989, lr_0 = 4.2778e-04
Loss = 1.2000e-01, PNorm = 75.8030, GNorm = 1.2958, lr_0 = 4.2749e-04
Loss = 1.1438e-01, PNorm = 75.8175, GNorm = 0.4777, lr_0 = 4.2719e-04
Loss = 1.2346e-01, PNorm = 75.8338, GNorm = 0.8905, lr_0 = 4.2690e-04
Loss = 1.3471e-01, PNorm = 75.8466, GNorm = 1.3200, lr_0 = 4.2661e-04
Loss = 1.4581e-01, PNorm = 75.8627, GNorm = 0.6475, lr_0 = 4.2632e-04
Loss = 1.0568e-01, PNorm = 75.8764, GNorm = 0.5403, lr_0 = 4.2602e-04
Loss = 1.2061e-01, PNorm = 75.8892, GNorm = 0.7446, lr_0 = 4.2573e-04
Loss = 1.1179e-01, PNorm = 75.8943, GNorm = 0.6171, lr_0 = 4.2544e-04
Loss = 1.2306e-01, PNorm = 75.9072, GNorm = 0.8080, lr_0 = 4.2515e-04
Loss = 1.1007e-01, PNorm = 75.9147, GNorm = 0.6840, lr_0 = 4.2486e-04
Loss = 1.2208e-01, PNorm = 75.9222, GNorm = 0.5763, lr_0 = 4.2457e-04
Loss = 1.2465e-01, PNorm = 75.9285, GNorm = 0.8981, lr_0 = 4.2428e-04
Loss = 1.3623e-01, PNorm = 75.9396, GNorm = 0.8063, lr_0 = 4.2399e-04
Loss = 1.2897e-01, PNorm = 75.9532, GNorm = 0.8094, lr_0 = 4.2370e-04
Loss = 1.1471e-01, PNorm = 75.9678, GNorm = 0.7913, lr_0 = 4.2340e-04
Loss = 1.1812e-01, PNorm = 75.9787, GNorm = 0.6390, lr_0 = 4.2311e-04
Loss = 1.1631e-01, PNorm = 75.9920, GNorm = 0.7383, lr_0 = 4.2283e-04
Loss = 1.2563e-01, PNorm = 76.0041, GNorm = 0.7677, lr_0 = 4.2254e-04
Loss = 1.2797e-01, PNorm = 76.0180, GNorm = 0.8209, lr_0 = 4.2225e-04
Loss = 1.0697e-01, PNorm = 76.0273, GNorm = 0.8077, lr_0 = 4.2196e-04
Loss = 1.1839e-01, PNorm = 76.0402, GNorm = 0.5624, lr_0 = 4.2167e-04
Loss = 1.2341e-01, PNorm = 76.0466, GNorm = 0.5213, lr_0 = 4.2138e-04
Loss = 1.1519e-01, PNorm = 76.0564, GNorm = 0.5915, lr_0 = 4.2109e-04
Loss = 1.2034e-01, PNorm = 76.0674, GNorm = 0.7315, lr_0 = 4.2080e-04
Loss = 1.2234e-01, PNorm = 76.0811, GNorm = 0.5174, lr_0 = 4.2051e-04
Loss = 1.1414e-01, PNorm = 76.0901, GNorm = 0.4890, lr_0 = 4.2023e-04
Loss = 1.1351e-01, PNorm = 76.1018, GNorm = 0.6295, lr_0 = 4.1994e-04
Loss = 1.0313e-01, PNorm = 76.1154, GNorm = 0.5447, lr_0 = 4.1965e-04
Loss = 1.4011e-01, PNorm = 76.1294, GNorm = 0.8352, lr_0 = 4.1936e-04
Loss = 1.3914e-01, PNorm = 76.1436, GNorm = 0.5785, lr_0 = 4.1907e-04
Loss = 1.3049e-01, PNorm = 76.1556, GNorm = 0.8579, lr_0 = 4.1879e-04
Loss = 1.2386e-01, PNorm = 76.1702, GNorm = 0.7097, lr_0 = 4.1850e-04
Loss = 1.2977e-01, PNorm = 76.1784, GNorm = 0.9466, lr_0 = 4.1821e-04
Loss = 1.3888e-01, PNorm = 76.1937, GNorm = 0.9635, lr_0 = 4.1793e-04
Loss = 1.2732e-01, PNorm = 76.2063, GNorm = 1.0121, lr_0 = 4.1764e-04
Loss = 1.3376e-01, PNorm = 76.2170, GNorm = 0.7313, lr_0 = 4.1736e-04
Loss = 1.2593e-01, PNorm = 76.2245, GNorm = 0.6238, lr_0 = 4.1707e-04
Loss = 1.2751e-01, PNorm = 76.2328, GNorm = 0.7417, lr_0 = 4.1678e-04
Loss = 1.3029e-01, PNorm = 76.2426, GNorm = 0.6525, lr_0 = 4.1650e-04
Loss = 1.1940e-01, PNorm = 76.2592, GNorm = 0.5696, lr_0 = 4.1621e-04
Loss = 1.3612e-01, PNorm = 76.2641, GNorm = 0.7091, lr_0 = 4.1593e-04
Loss = 1.2287e-01, PNorm = 76.2689, GNorm = 0.6198, lr_0 = 4.1564e-04
Loss = 1.1740e-01, PNorm = 76.2786, GNorm = 0.6862, lr_0 = 4.1536e-04
Loss = 1.1857e-01, PNorm = 76.2918, GNorm = 0.5658, lr_0 = 4.1507e-04
Loss = 1.1582e-01, PNorm = 76.3018, GNorm = 0.7155, lr_0 = 4.1479e-04
Loss = 1.2207e-01, PNorm = 76.3135, GNorm = 0.5193, lr_0 = 4.1450e-04
Loss = 1.2211e-01, PNorm = 76.3247, GNorm = 0.6210, lr_0 = 4.1422e-04
Loss = 1.2094e-01, PNorm = 76.3307, GNorm = 0.7957, lr_0 = 4.1394e-04
Loss = 1.2701e-01, PNorm = 76.3473, GNorm = 0.6557, lr_0 = 4.1365e-04
Loss = 1.2284e-01, PNorm = 76.3555, GNorm = 0.7651, lr_0 = 4.1337e-04
Loss = 1.2663e-01, PNorm = 76.3702, GNorm = 0.7361, lr_0 = 4.1309e-04
Loss = 1.2715e-01, PNorm = 76.3815, GNorm = 1.2944, lr_0 = 4.1280e-04
Loss = 1.2574e-01, PNorm = 76.3918, GNorm = 0.6230, lr_0 = 4.1252e-04
Loss = 1.2371e-01, PNorm = 76.4015, GNorm = 0.8564, lr_0 = 4.1224e-04
Loss = 1.2649e-01, PNorm = 76.4114, GNorm = 0.8100, lr_0 = 4.1196e-04
Loss = 1.2398e-01, PNorm = 76.4221, GNorm = 1.5733, lr_0 = 4.1167e-04
Loss = 1.2006e-01, PNorm = 76.4249, GNorm = 0.8353, lr_0 = 4.1139e-04
Loss = 1.2434e-01, PNorm = 76.4384, GNorm = 1.1224, lr_0 = 4.1111e-04
Loss = 1.1765e-01, PNorm = 76.4422, GNorm = 0.7369, lr_0 = 4.1083e-04
Loss = 1.1723e-01, PNorm = 76.4520, GNorm = 0.8848, lr_0 = 4.1055e-04
Loss = 1.2539e-01, PNorm = 76.4596, GNorm = 0.7527, lr_0 = 4.1027e-04
Loss = 1.1926e-01, PNorm = 76.4756, GNorm = 1.2935, lr_0 = 4.0998e-04
Loss = 1.1566e-01, PNorm = 76.4950, GNorm = 0.6192, lr_0 = 4.0970e-04
Loss = 1.0469e-01, PNorm = 76.5068, GNorm = 0.6674, lr_0 = 4.0942e-04
Loss = 1.3366e-01, PNorm = 76.5129, GNorm = 0.7411, lr_0 = 4.0914e-04
Loss = 1.1323e-01, PNorm = 76.5200, GNorm = 0.7423, lr_0 = 4.0886e-04
Loss = 1.1836e-01, PNorm = 76.5250, GNorm = 0.4587, lr_0 = 4.0858e-04
Loss = 1.2235e-01, PNorm = 76.5370, GNorm = 0.6561, lr_0 = 4.0830e-04
Loss = 1.2156e-01, PNorm = 76.5461, GNorm = 0.7525, lr_0 = 4.0802e-04
Loss = 1.3184e-01, PNorm = 76.5599, GNorm = 0.7121, lr_0 = 4.0774e-04
Loss = 1.2411e-01, PNorm = 76.5715, GNorm = 0.5890, lr_0 = 4.0746e-04
Loss = 1.1663e-01, PNorm = 76.5828, GNorm = 1.1279, lr_0 = 4.0718e-04
Loss = 1.2517e-01, PNorm = 76.6017, GNorm = 0.7835, lr_0 = 4.0691e-04
Loss = 1.1304e-01, PNorm = 76.6113, GNorm = 0.4843, lr_0 = 4.0663e-04
Loss = 1.3785e-01, PNorm = 76.6211, GNorm = 0.8957, lr_0 = 4.0635e-04
Loss = 1.3085e-01, PNorm = 76.6365, GNorm = 0.6457, lr_0 = 4.0607e-04
Loss = 1.3252e-01, PNorm = 76.6499, GNorm = 0.9451, lr_0 = 4.0579e-04
Loss = 1.3220e-01, PNorm = 76.6544, GNorm = 0.8197, lr_0 = 4.0551e-04
Loss = 1.3395e-01, PNorm = 76.6663, GNorm = 0.5004, lr_0 = 4.0524e-04
Loss = 1.2778e-01, PNorm = 76.6765, GNorm = 0.8216, lr_0 = 4.0496e-04
Loss = 1.1855e-01, PNorm = 76.6909, GNorm = 0.5262, lr_0 = 4.0468e-04
Validation mae = 0.233000
Epoch 13
Loss = 1.2079e-01, PNorm = 76.6986, GNorm = 0.4739, lr_0 = 4.0440e-04
Loss = 1.2159e-01, PNorm = 76.7104, GNorm = 0.6450, lr_0 = 4.0413e-04
Loss = 1.1585e-01, PNorm = 76.7233, GNorm = 0.7214, lr_0 = 4.0385e-04
Loss = 1.1970e-01, PNorm = 76.7332, GNorm = 0.5613, lr_0 = 4.0357e-04
Loss = 1.0705e-01, PNorm = 76.7462, GNorm = 0.6456, lr_0 = 4.0330e-04
Loss = 1.1733e-01, PNorm = 76.7599, GNorm = 1.1621, lr_0 = 4.0302e-04
Loss = 1.2748e-01, PNorm = 76.7749, GNorm = 0.4658, lr_0 = 4.0274e-04
Loss = 9.6535e-02, PNorm = 76.7881, GNorm = 0.6587, lr_0 = 4.0247e-04
Loss = 1.1558e-01, PNorm = 76.7987, GNorm = 0.9763, lr_0 = 4.0219e-04
Loss = 1.2272e-01, PNorm = 76.8155, GNorm = 0.9818, lr_0 = 4.0192e-04
Loss = 1.3466e-01, PNorm = 76.8249, GNorm = 0.8888, lr_0 = 4.0164e-04
Loss = 1.2196e-01, PNorm = 76.8358, GNorm = 0.9254, lr_0 = 4.0137e-04
Loss = 1.1617e-01, PNorm = 76.8473, GNorm = 0.8554, lr_0 = 4.0109e-04
Loss = 1.0681e-01, PNorm = 76.8553, GNorm = 0.5879, lr_0 = 4.0082e-04
Loss = 1.3114e-01, PNorm = 76.8617, GNorm = 1.5493, lr_0 = 4.0054e-04
Loss = 1.0234e-01, PNorm = 76.8726, GNorm = 0.7134, lr_0 = 4.0027e-04
Loss = 1.0955e-01, PNorm = 76.8865, GNorm = 0.7920, lr_0 = 3.9999e-04
Loss = 1.0004e-01, PNorm = 76.8933, GNorm = 0.4363, lr_0 = 3.9972e-04
Loss = 1.1199e-01, PNorm = 76.9029, GNorm = 0.4748, lr_0 = 3.9945e-04
Loss = 1.1528e-01, PNorm = 76.9099, GNorm = 0.9514, lr_0 = 3.9917e-04
Loss = 1.1177e-01, PNorm = 76.9202, GNorm = 0.6111, lr_0 = 3.9890e-04
Loss = 1.1593e-01, PNorm = 76.9285, GNorm = 0.4658, lr_0 = 3.9863e-04
Loss = 1.1308e-01, PNorm = 76.9446, GNorm = 0.5440, lr_0 = 3.9835e-04
Loss = 1.1885e-01, PNorm = 76.9593, GNorm = 0.8068, lr_0 = 3.9808e-04
Loss = 1.2395e-01, PNorm = 76.9708, GNorm = 0.5840, lr_0 = 3.9781e-04
Loss = 1.1731e-01, PNorm = 76.9828, GNorm = 0.6838, lr_0 = 3.9753e-04
Loss = 1.1509e-01, PNorm = 76.9936, GNorm = 1.0903, lr_0 = 3.9726e-04
Loss = 1.3007e-01, PNorm = 76.9998, GNorm = 0.8256, lr_0 = 3.9699e-04
Loss = 1.2199e-01, PNorm = 77.0096, GNorm = 0.5715, lr_0 = 3.9672e-04
Loss = 1.2219e-01, PNorm = 77.0181, GNorm = 0.6563, lr_0 = 3.9645e-04
Loss = 1.0599e-01, PNorm = 77.0303, GNorm = 0.7465, lr_0 = 3.9617e-04
Loss = 1.1518e-01, PNorm = 77.0340, GNorm = 0.5852, lr_0 = 3.9590e-04
Loss = 1.0751e-01, PNorm = 77.0478, GNorm = 0.6205, lr_0 = 3.9563e-04
Loss = 1.1818e-01, PNorm = 77.0591, GNorm = 0.6595, lr_0 = 3.9536e-04
Loss = 1.0808e-01, PNorm = 77.0655, GNorm = 0.6891, lr_0 = 3.9509e-04
Loss = 1.1332e-01, PNorm = 77.0808, GNorm = 0.8174, lr_0 = 3.9482e-04
Loss = 1.1411e-01, PNorm = 77.0956, GNorm = 0.5910, lr_0 = 3.9455e-04
Loss = 1.2305e-01, PNorm = 77.1038, GNorm = 0.8572, lr_0 = 3.9428e-04
Loss = 1.2668e-01, PNorm = 77.1162, GNorm = 0.6427, lr_0 = 3.9401e-04
Loss = 1.3116e-01, PNorm = 77.1269, GNorm = 0.5406, lr_0 = 3.9374e-04
Loss = 1.2017e-01, PNorm = 77.1383, GNorm = 0.7430, lr_0 = 3.9347e-04
Loss = 1.1540e-01, PNorm = 77.1454, GNorm = 0.7561, lr_0 = 3.9320e-04
Loss = 1.0187e-01, PNorm = 77.1582, GNorm = 0.5695, lr_0 = 3.9293e-04
Loss = 1.0955e-01, PNorm = 77.1723, GNorm = 0.6525, lr_0 = 3.9266e-04
Loss = 1.1319e-01, PNorm = 77.1798, GNorm = 0.5097, lr_0 = 3.9239e-04
Loss = 1.2147e-01, PNorm = 77.1867, GNorm = 0.5704, lr_0 = 3.9212e-04
Loss = 1.1897e-01, PNorm = 77.1936, GNorm = 0.4664, lr_0 = 3.9185e-04
Loss = 9.8322e-02, PNorm = 77.1995, GNorm = 0.6153, lr_0 = 3.9159e-04
Loss = 1.2198e-01, PNorm = 77.2034, GNorm = 0.7311, lr_0 = 3.9132e-04
Loss = 1.4571e-01, PNorm = 77.2121, GNorm = 0.6837, lr_0 = 3.9105e-04
Loss = 1.1400e-01, PNorm = 77.2288, GNorm = 0.8013, lr_0 = 3.9078e-04
Loss = 1.2021e-01, PNorm = 77.2420, GNorm = 0.7722, lr_0 = 3.9051e-04
Loss = 1.1561e-01, PNorm = 77.2512, GNorm = 0.7764, lr_0 = 3.9025e-04
Loss = 1.2106e-01, PNorm = 77.2615, GNorm = 1.1627, lr_0 = 3.8998e-04
Loss = 1.2748e-01, PNorm = 77.2679, GNorm = 0.9657, lr_0 = 3.8971e-04
Loss = 1.3032e-01, PNorm = 77.2774, GNorm = 0.6294, lr_0 = 3.8945e-04
Loss = 1.2100e-01, PNorm = 77.2902, GNorm = 0.7360, lr_0 = 3.8918e-04
Loss = 1.1644e-01, PNorm = 77.3004, GNorm = 0.7014, lr_0 = 3.8891e-04
Loss = 1.1700e-01, PNorm = 77.3053, GNorm = 0.5020, lr_0 = 3.8865e-04
Loss = 1.0159e-01, PNorm = 77.3146, GNorm = 0.8113, lr_0 = 3.8838e-04
Loss = 1.1930e-01, PNorm = 77.3212, GNorm = 0.7396, lr_0 = 3.8811e-04
Loss = 1.1912e-01, PNorm = 77.3354, GNorm = 0.5105, lr_0 = 3.8785e-04
Loss = 1.2325e-01, PNorm = 77.3484, GNorm = 0.7806, lr_0 = 3.8758e-04
Loss = 1.0822e-01, PNorm = 77.3572, GNorm = 0.9116, lr_0 = 3.8732e-04
Loss = 1.2075e-01, PNorm = 77.3654, GNorm = 0.7688, lr_0 = 3.8705e-04
Loss = 1.0610e-01, PNorm = 77.3757, GNorm = 0.5193, lr_0 = 3.8679e-04
Loss = 1.1076e-01, PNorm = 77.3819, GNorm = 0.5669, lr_0 = 3.8652e-04
Loss = 1.1436e-01, PNorm = 77.3881, GNorm = 0.9104, lr_0 = 3.8626e-04
Loss = 1.1275e-01, PNorm = 77.3980, GNorm = 0.8108, lr_0 = 3.8599e-04
Loss = 1.2003e-01, PNorm = 77.4045, GNorm = 0.6331, lr_0 = 3.8573e-04
Loss = 1.1418e-01, PNorm = 77.4101, GNorm = 0.8665, lr_0 = 3.8546e-04
Loss = 1.2435e-01, PNorm = 77.4231, GNorm = 1.3446, lr_0 = 3.8520e-04
Loss = 1.1660e-01, PNorm = 77.4352, GNorm = 0.6665, lr_0 = 3.8493e-04
Loss = 1.2331e-01, PNorm = 77.4490, GNorm = 1.0653, lr_0 = 3.8467e-04
Loss = 1.3176e-01, PNorm = 77.4579, GNorm = 0.5300, lr_0 = 3.8441e-04
Loss = 1.0232e-01, PNorm = 77.4657, GNorm = 0.8927, lr_0 = 3.8414e-04
Loss = 1.0780e-01, PNorm = 77.4708, GNorm = 0.7075, lr_0 = 3.8388e-04
Loss = 1.1136e-01, PNorm = 77.4776, GNorm = 0.5531, lr_0 = 3.8362e-04
Loss = 1.1322e-01, PNorm = 77.4847, GNorm = 0.5782, lr_0 = 3.8336e-04
Loss = 1.1607e-01, PNorm = 77.4917, GNorm = 0.7810, lr_0 = 3.8309e-04
Loss = 1.0843e-01, PNorm = 77.4988, GNorm = 0.8806, lr_0 = 3.8283e-04
Loss = 1.1673e-01, PNorm = 77.5079, GNorm = 0.6479, lr_0 = 3.8257e-04
Loss = 1.4352e-01, PNorm = 77.5200, GNorm = 0.8017, lr_0 = 3.8231e-04
Loss = 1.0032e-01, PNorm = 77.5300, GNorm = 0.5435, lr_0 = 3.8204e-04
Loss = 1.0314e-01, PNorm = 77.5396, GNorm = 0.8184, lr_0 = 3.8178e-04
Loss = 1.0948e-01, PNorm = 77.5448, GNorm = 0.6564, lr_0 = 3.8152e-04
Loss = 1.0772e-01, PNorm = 77.5514, GNorm = 0.6466, lr_0 = 3.8126e-04
Loss = 1.1916e-01, PNorm = 77.5594, GNorm = 0.5871, lr_0 = 3.8100e-04
Loss = 1.1863e-01, PNorm = 77.5701, GNorm = 0.6797, lr_0 = 3.8074e-04
Loss = 1.1319e-01, PNorm = 77.5748, GNorm = 0.4820, lr_0 = 3.8048e-04
Loss = 1.2137e-01, PNorm = 77.5814, GNorm = 0.6320, lr_0 = 3.8022e-04
Loss = 1.1514e-01, PNorm = 77.5921, GNorm = 0.7139, lr_0 = 3.7995e-04
Loss = 1.3565e-01, PNorm = 77.6041, GNorm = 0.6703, lr_0 = 3.7969e-04
Loss = 1.3774e-01, PNorm = 77.6145, GNorm = 0.6663, lr_0 = 3.7943e-04
Loss = 1.1609e-01, PNorm = 77.6227, GNorm = 0.5940, lr_0 = 3.7917e-04
Loss = 1.2654e-01, PNorm = 77.6308, GNorm = 0.5067, lr_0 = 3.7891e-04
Loss = 1.1356e-01, PNorm = 77.6423, GNorm = 1.3800, lr_0 = 3.7866e-04
Loss = 1.1948e-01, PNorm = 77.6550, GNorm = 0.6352, lr_0 = 3.7840e-04
Loss = 1.1675e-01, PNorm = 77.6671, GNorm = 0.5193, lr_0 = 3.7814e-04
Loss = 1.1761e-01, PNorm = 77.6741, GNorm = 0.6174, lr_0 = 3.7788e-04
Loss = 1.1420e-01, PNorm = 77.6852, GNorm = 0.7033, lr_0 = 3.7762e-04
Loss = 1.4441e-01, PNorm = 77.6931, GNorm = 0.6272, lr_0 = 3.7736e-04
Loss = 1.1545e-01, PNorm = 77.7000, GNorm = 0.6670, lr_0 = 3.7710e-04
Loss = 1.1987e-01, PNorm = 77.7034, GNorm = 0.6731, lr_0 = 3.7684e-04
Loss = 1.1106e-01, PNorm = 77.7120, GNorm = 0.6963, lr_0 = 3.7659e-04
Loss = 1.2316e-01, PNorm = 77.7190, GNorm = 0.6974, lr_0 = 3.7633e-04
Loss = 1.2780e-01, PNorm = 77.7296, GNorm = 1.1009, lr_0 = 3.7607e-04
Loss = 1.2596e-01, PNorm = 77.7380, GNorm = 0.6619, lr_0 = 3.7581e-04
Loss = 1.1023e-01, PNorm = 77.7470, GNorm = 0.5784, lr_0 = 3.7555e-04
Loss = 1.1268e-01, PNorm = 77.7528, GNorm = 0.6949, lr_0 = 3.7530e-04
Loss = 1.1386e-01, PNorm = 77.7565, GNorm = 0.9503, lr_0 = 3.7504e-04
Loss = 1.4266e-01, PNorm = 77.7647, GNorm = 0.6945, lr_0 = 3.7478e-04
Loss = 1.3099e-01, PNorm = 77.7746, GNorm = 0.7642, lr_0 = 3.7453e-04
Loss = 1.1537e-01, PNorm = 77.7805, GNorm = 0.9556, lr_0 = 3.7427e-04
Loss = 1.0942e-01, PNorm = 77.7884, GNorm = 0.5308, lr_0 = 3.7401e-04
Loss = 1.3264e-01, PNorm = 77.8009, GNorm = 0.6870, lr_0 = 3.7376e-04
Loss = 1.1558e-01, PNorm = 77.8155, GNorm = 1.0878, lr_0 = 3.7350e-04
Loss = 1.3236e-01, PNorm = 77.8269, GNorm = 0.7363, lr_0 = 3.7325e-04
Loss = 1.1055e-01, PNorm = 77.8395, GNorm = 0.6254, lr_0 = 3.7299e-04
Loss = 1.2550e-01, PNorm = 77.8487, GNorm = 0.7889, lr_0 = 3.7273e-04
Validation mae = 0.234719
Epoch 14
Loss = 1.2138e-01, PNorm = 77.8655, GNorm = 0.6668, lr_0 = 3.7248e-04
Loss = 9.9641e-02, PNorm = 77.8781, GNorm = 0.7716, lr_0 = 3.7222e-04
Loss = 9.3381e-02, PNorm = 77.8863, GNorm = 0.5390, lr_0 = 3.7197e-04
Loss = 1.1037e-01, PNorm = 77.8963, GNorm = 0.7933, lr_0 = 3.7171e-04
Loss = 1.0332e-01, PNorm = 77.9092, GNorm = 1.0151, lr_0 = 3.7146e-04
Loss = 1.0585e-01, PNorm = 77.9190, GNorm = 0.5509, lr_0 = 3.7120e-04
Loss = 1.1265e-01, PNorm = 77.9312, GNorm = 0.6824, lr_0 = 3.7095e-04
Loss = 1.0614e-01, PNorm = 77.9408, GNorm = 0.7922, lr_0 = 3.7070e-04
Loss = 1.0407e-01, PNorm = 77.9442, GNorm = 0.6982, lr_0 = 3.7044e-04
Loss = 1.0067e-01, PNorm = 77.9547, GNorm = 0.6293, lr_0 = 3.7019e-04
Loss = 1.1217e-01, PNorm = 77.9614, GNorm = 0.8838, lr_0 = 3.6993e-04
Loss = 9.7044e-02, PNorm = 77.9714, GNorm = 0.8106, lr_0 = 3.6968e-04
Loss = 1.2280e-01, PNorm = 77.9811, GNorm = 0.8912, lr_0 = 3.6943e-04
Loss = 1.2494e-01, PNorm = 77.9924, GNorm = 0.6204, lr_0 = 3.6917e-04
Loss = 1.0096e-01, PNorm = 78.0056, GNorm = 0.8499, lr_0 = 3.6892e-04
Loss = 1.0056e-01, PNorm = 78.0130, GNorm = 0.4738, lr_0 = 3.6867e-04
Loss = 1.2448e-01, PNorm = 78.0213, GNorm = 0.6020, lr_0 = 3.6842e-04
Loss = 1.0959e-01, PNorm = 78.0300, GNorm = 0.9866, lr_0 = 3.6816e-04
Loss = 9.8698e-02, PNorm = 78.0384, GNorm = 0.5626, lr_0 = 3.6791e-04
Loss = 1.0970e-01, PNorm = 78.0489, GNorm = 0.9375, lr_0 = 3.6766e-04
Loss = 1.1908e-01, PNorm = 78.0559, GNorm = 0.9233, lr_0 = 3.6741e-04
Loss = 1.1078e-01, PNorm = 78.0639, GNorm = 0.5159, lr_0 = 3.6716e-04
Loss = 1.1285e-01, PNorm = 78.0753, GNorm = 0.6664, lr_0 = 3.6690e-04
Loss = 9.3320e-02, PNorm = 78.0862, GNorm = 0.6781, lr_0 = 3.6665e-04
Loss = 1.0204e-01, PNorm = 78.0969, GNorm = 0.4694, lr_0 = 3.6640e-04
Loss = 1.1506e-01, PNorm = 78.1054, GNorm = 0.6168, lr_0 = 3.6615e-04
Loss = 1.0707e-01, PNorm = 78.1199, GNorm = 0.5536, lr_0 = 3.6590e-04
Loss = 1.0636e-01, PNorm = 78.1274, GNorm = 0.7574, lr_0 = 3.6565e-04
Loss = 1.1160e-01, PNorm = 78.1306, GNorm = 0.9906, lr_0 = 3.6540e-04
Loss = 1.2349e-01, PNorm = 78.1357, GNorm = 0.7943, lr_0 = 3.6515e-04
Loss = 1.0444e-01, PNorm = 78.1373, GNorm = 0.6268, lr_0 = 3.6490e-04
Loss = 1.0648e-01, PNorm = 78.1453, GNorm = 0.4793, lr_0 = 3.6465e-04
Loss = 1.0556e-01, PNorm = 78.1574, GNorm = 0.5882, lr_0 = 3.6440e-04
Loss = 1.2241e-01, PNorm = 78.1660, GNorm = 1.0115, lr_0 = 3.6415e-04
Loss = 1.2684e-01, PNorm = 78.1799, GNorm = 0.5291, lr_0 = 3.6390e-04
Loss = 1.0343e-01, PNorm = 78.1922, GNorm = 1.1601, lr_0 = 3.6365e-04
Loss = 1.2647e-01, PNorm = 78.1960, GNorm = 1.1343, lr_0 = 3.6340e-04
Loss = 1.2748e-01, PNorm = 78.2085, GNorm = 0.8450, lr_0 = 3.6315e-04
Loss = 1.2092e-01, PNorm = 78.2198, GNorm = 0.6661, lr_0 = 3.6290e-04
Loss = 1.2181e-01, PNorm = 78.2344, GNorm = 0.7979, lr_0 = 3.6266e-04
Loss = 1.1512e-01, PNorm = 78.2529, GNorm = 1.5079, lr_0 = 3.6241e-04
Loss = 1.0902e-01, PNorm = 78.2656, GNorm = 0.5606, lr_0 = 3.6216e-04
Loss = 1.1312e-01, PNorm = 78.2763, GNorm = 0.7930, lr_0 = 3.6191e-04
Loss = 9.4246e-02, PNorm = 78.2843, GNorm = 0.7442, lr_0 = 3.6166e-04
Loss = 1.0396e-01, PNorm = 78.2925, GNorm = 0.6320, lr_0 = 3.6141e-04
Loss = 1.1933e-01, PNorm = 78.3024, GNorm = 0.9572, lr_0 = 3.6117e-04
Loss = 1.0868e-01, PNorm = 78.3170, GNorm = 0.5510, lr_0 = 3.6092e-04
Loss = 1.3478e-01, PNorm = 78.3227, GNorm = 1.0146, lr_0 = 3.6067e-04
Loss = 1.2297e-01, PNorm = 78.3281, GNorm = 0.5488, lr_0 = 3.6043e-04
Loss = 1.1852e-01, PNorm = 78.3376, GNorm = 0.7721, lr_0 = 3.6018e-04
Loss = 1.0736e-01, PNorm = 78.3536, GNorm = 0.5261, lr_0 = 3.5993e-04
Loss = 1.3525e-01, PNorm = 78.3669, GNorm = 0.8525, lr_0 = 3.5969e-04
Loss = 1.1883e-01, PNorm = 78.3780, GNorm = 0.5932, lr_0 = 3.5944e-04
Loss = 1.1127e-01, PNorm = 78.3868, GNorm = 0.5921, lr_0 = 3.5919e-04
Loss = 9.5651e-02, PNorm = 78.3957, GNorm = 0.5046, lr_0 = 3.5895e-04
Loss = 1.1996e-01, PNorm = 78.3995, GNorm = 1.1127, lr_0 = 3.5870e-04
Loss = 1.2362e-01, PNorm = 78.4050, GNorm = 0.9186, lr_0 = 3.5845e-04
Loss = 1.0148e-01, PNorm = 78.4124, GNorm = 1.0317, lr_0 = 3.5821e-04
Loss = 1.2098e-01, PNorm = 78.4185, GNorm = 0.5832, lr_0 = 3.5796e-04
Loss = 1.0351e-01, PNorm = 78.4273, GNorm = 1.1297, lr_0 = 3.5772e-04
Loss = 1.2617e-01, PNorm = 78.4344, GNorm = 0.8987, lr_0 = 3.5747e-04
Loss = 1.2068e-01, PNorm = 78.4448, GNorm = 0.8661, lr_0 = 3.5723e-04
Loss = 9.5371e-02, PNorm = 78.4503, GNorm = 0.5794, lr_0 = 3.5698e-04
Loss = 1.0655e-01, PNorm = 78.4549, GNorm = 0.9400, lr_0 = 3.5674e-04
Loss = 1.0000e-01, PNorm = 78.4601, GNorm = 0.4726, lr_0 = 3.5650e-04
Loss = 1.2859e-01, PNorm = 78.4648, GNorm = 0.5489, lr_0 = 3.5625e-04
Loss = 1.1953e-01, PNorm = 78.4779, GNorm = 0.6464, lr_0 = 3.5601e-04
Loss = 1.0250e-01, PNorm = 78.4827, GNorm = 0.7642, lr_0 = 3.5576e-04
Loss = 1.3825e-01, PNorm = 78.4878, GNorm = 0.7522, lr_0 = 3.5552e-04
Loss = 1.2130e-01, PNorm = 78.5007, GNorm = 0.5463, lr_0 = 3.5528e-04
Loss = 1.3051e-01, PNorm = 78.5107, GNorm = 0.5387, lr_0 = 3.5503e-04
Loss = 1.0798e-01, PNorm = 78.5235, GNorm = 0.5438, lr_0 = 3.5479e-04
Loss = 1.2219e-01, PNorm = 78.5280, GNorm = 0.6006, lr_0 = 3.5455e-04
Loss = 1.1224e-01, PNorm = 78.5335, GNorm = 0.5140, lr_0 = 3.5430e-04
Loss = 1.0857e-01, PNorm = 78.5409, GNorm = 1.1829, lr_0 = 3.5406e-04
Loss = 1.3830e-01, PNorm = 78.5462, GNorm = 0.6012, lr_0 = 3.5382e-04
Loss = 1.1586e-01, PNorm = 78.5579, GNorm = 0.8740, lr_0 = 3.5358e-04
Loss = 1.0308e-01, PNorm = 78.5660, GNorm = 0.6744, lr_0 = 3.5333e-04
Loss = 1.1375e-01, PNorm = 78.5787, GNorm = 0.5809, lr_0 = 3.5309e-04
Loss = 1.0946e-01, PNorm = 78.5849, GNorm = 0.4821, lr_0 = 3.5285e-04
Loss = 1.1914e-01, PNorm = 78.5937, GNorm = 0.6108, lr_0 = 3.5261e-04
Loss = 1.0865e-01, PNorm = 78.6019, GNorm = 0.5975, lr_0 = 3.5237e-04
Loss = 1.0769e-01, PNorm = 78.6080, GNorm = 0.5850, lr_0 = 3.5212e-04
Loss = 1.2305e-01, PNorm = 78.6164, GNorm = 0.5853, lr_0 = 3.5188e-04
Loss = 9.4224e-02, PNorm = 78.6224, GNorm = 0.3847, lr_0 = 3.5164e-04
Loss = 1.1013e-01, PNorm = 78.6278, GNorm = 0.6066, lr_0 = 3.5140e-04
Loss = 1.1181e-01, PNorm = 78.6367, GNorm = 0.8099, lr_0 = 3.5116e-04
Loss = 1.0482e-01, PNorm = 78.6416, GNorm = 0.5863, lr_0 = 3.5092e-04
Loss = 1.1028e-01, PNorm = 78.6520, GNorm = 0.6703, lr_0 = 3.5068e-04
Loss = 1.0450e-01, PNorm = 78.6588, GNorm = 1.4906, lr_0 = 3.5044e-04
Loss = 1.2211e-01, PNorm = 78.6660, GNorm = 0.5986, lr_0 = 3.5020e-04
Loss = 1.0807e-01, PNorm = 78.6764, GNorm = 0.5355, lr_0 = 3.4996e-04
Loss = 1.0372e-01, PNorm = 78.6815, GNorm = 0.7753, lr_0 = 3.4972e-04
Loss = 1.1402e-01, PNorm = 78.6856, GNorm = 0.7540, lr_0 = 3.4948e-04
Loss = 9.7939e-02, PNorm = 78.6915, GNorm = 0.5087, lr_0 = 3.4924e-04
Loss = 1.0848e-01, PNorm = 78.6930, GNorm = 0.6753, lr_0 = 3.4900e-04
Loss = 1.2001e-01, PNorm = 78.6955, GNorm = 0.8673, lr_0 = 3.4876e-04
Loss = 1.2123e-01, PNorm = 78.6986, GNorm = 0.8563, lr_0 = 3.4852e-04
Loss = 1.1155e-01, PNorm = 78.7060, GNorm = 0.4277, lr_0 = 3.4828e-04
Loss = 1.2272e-01, PNorm = 78.7138, GNorm = 0.6009, lr_0 = 3.4805e-04
Loss = 1.1680e-01, PNorm = 78.7255, GNorm = 0.7149, lr_0 = 3.4781e-04
Loss = 1.1705e-01, PNorm = 78.7378, GNorm = 0.6021, lr_0 = 3.4757e-04
Loss = 1.2107e-01, PNorm = 78.7463, GNorm = 1.1511, lr_0 = 3.4733e-04
Loss = 1.1628e-01, PNorm = 78.7534, GNorm = 0.7191, lr_0 = 3.4709e-04
Loss = 1.0990e-01, PNorm = 78.7574, GNorm = 0.4860, lr_0 = 3.4686e-04
Loss = 1.1918e-01, PNorm = 78.7618, GNorm = 0.7045, lr_0 = 3.4662e-04
Loss = 1.0629e-01, PNorm = 78.7734, GNorm = 0.6107, lr_0 = 3.4638e-04
Loss = 1.1674e-01, PNorm = 78.7871, GNorm = 0.8458, lr_0 = 3.4614e-04
Loss = 1.0600e-01, PNorm = 78.7963, GNorm = 0.6240, lr_0 = 3.4591e-04
Loss = 1.2636e-01, PNorm = 78.8047, GNorm = 1.0690, lr_0 = 3.4567e-04
Loss = 1.0774e-01, PNorm = 78.8123, GNorm = 0.6000, lr_0 = 3.4543e-04
Loss = 1.1512e-01, PNorm = 78.8157, GNorm = 0.8225, lr_0 = 3.4520e-04
Loss = 1.2398e-01, PNorm = 78.8249, GNorm = 1.5269, lr_0 = 3.4496e-04
Loss = 1.1352e-01, PNorm = 78.8317, GNorm = 0.6944, lr_0 = 3.4472e-04
Loss = 1.1508e-01, PNorm = 78.8363, GNorm = 0.6872, lr_0 = 3.4449e-04
Loss = 1.1863e-01, PNorm = 78.8443, GNorm = 0.7196, lr_0 = 3.4425e-04
Loss = 1.0835e-01, PNorm = 78.8582, GNorm = 0.7223, lr_0 = 3.4402e-04
Loss = 1.3190e-01, PNorm = 78.8664, GNorm = 0.7319, lr_0 = 3.4378e-04
Loss = 1.0331e-01, PNorm = 78.8721, GNorm = 0.7870, lr_0 = 3.4354e-04
Loss = 1.1456e-01, PNorm = 78.8814, GNorm = 0.6997, lr_0 = 3.4331e-04
Validation mae = 0.231707
Epoch 15
Loss = 1.1024e-01, PNorm = 78.8913, GNorm = 0.8365, lr_0 = 3.4307e-04
Loss = 1.0545e-01, PNorm = 78.8977, GNorm = 0.6262, lr_0 = 3.4284e-04
Loss = 1.1361e-01, PNorm = 78.9054, GNorm = 0.9871, lr_0 = 3.4260e-04
Loss = 1.0198e-01, PNorm = 78.9147, GNorm = 0.6086, lr_0 = 3.4237e-04
Loss = 9.8131e-02, PNorm = 78.9256, GNorm = 0.6289, lr_0 = 3.4213e-04
Loss = 1.0035e-01, PNorm = 78.9336, GNorm = 1.6970, lr_0 = 3.4190e-04
Loss = 9.7002e-02, PNorm = 78.9406, GNorm = 0.6839, lr_0 = 3.4167e-04
Loss = 1.0376e-01, PNorm = 78.9523, GNorm = 0.6744, lr_0 = 3.4143e-04
Loss = 1.1830e-01, PNorm = 78.9659, GNorm = 0.6831, lr_0 = 3.4120e-04
Loss = 1.1344e-01, PNorm = 78.9759, GNorm = 0.5946, lr_0 = 3.4096e-04
Loss = 1.0085e-01, PNorm = 78.9836, GNorm = 0.6016, lr_0 = 3.4073e-04
Loss = 9.4754e-02, PNorm = 78.9974, GNorm = 0.7672, lr_0 = 3.4050e-04
Loss = 1.1589e-01, PNorm = 79.0098, GNorm = 0.6682, lr_0 = 3.4026e-04
Loss = 1.0287e-01, PNorm = 79.0165, GNorm = 0.5416, lr_0 = 3.4003e-04
Loss = 1.0693e-01, PNorm = 79.0275, GNorm = 0.6288, lr_0 = 3.3980e-04
Loss = 9.2932e-02, PNorm = 79.0361, GNorm = 0.6153, lr_0 = 3.3956e-04
Loss = 1.0368e-01, PNorm = 79.0466, GNorm = 0.5119, lr_0 = 3.3933e-04
Loss = 1.0857e-01, PNorm = 79.0565, GNorm = 0.8644, lr_0 = 3.3910e-04
Loss = 9.9048e-02, PNorm = 79.0644, GNorm = 0.7843, lr_0 = 3.3887e-04
Loss = 1.0093e-01, PNorm = 79.0711, GNorm = 0.7362, lr_0 = 3.3864e-04
Loss = 1.0207e-01, PNorm = 79.0805, GNorm = 0.8833, lr_0 = 3.3840e-04
Loss = 9.7488e-02, PNorm = 79.0864, GNorm = 0.5179, lr_0 = 3.3817e-04
Loss = 1.0788e-01, PNorm = 79.0957, GNorm = 0.5741, lr_0 = 3.3794e-04
Loss = 1.0983e-01, PNorm = 79.1064, GNorm = 0.6230, lr_0 = 3.3771e-04
Loss = 1.2037e-01, PNorm = 79.1111, GNorm = 0.6159, lr_0 = 3.3748e-04
Loss = 1.1528e-01, PNorm = 79.1188, GNorm = 0.8862, lr_0 = 3.3725e-04
Loss = 1.0557e-01, PNorm = 79.1309, GNorm = 0.4715, lr_0 = 3.3701e-04
Loss = 1.0722e-01, PNorm = 79.1388, GNorm = 0.9819, lr_0 = 3.3678e-04
Loss = 1.0276e-01, PNorm = 79.1480, GNorm = 0.6566, lr_0 = 3.3655e-04
Loss = 1.0476e-01, PNorm = 79.1559, GNorm = 0.6865, lr_0 = 3.3632e-04
Loss = 1.1295e-01, PNorm = 79.1648, GNorm = 0.7348, lr_0 = 3.3609e-04
Loss = 1.0434e-01, PNorm = 79.1710, GNorm = 0.8747, lr_0 = 3.3586e-04
Loss = 9.1584e-02, PNorm = 79.1754, GNorm = 0.5739, lr_0 = 3.3563e-04
Loss = 9.6511e-02, PNorm = 79.1834, GNorm = 0.6465, lr_0 = 3.3540e-04
Loss = 1.0209e-01, PNorm = 79.1922, GNorm = 0.7073, lr_0 = 3.3517e-04
Loss = 1.0670e-01, PNorm = 79.1994, GNorm = 0.8215, lr_0 = 3.3494e-04
Loss = 1.3056e-01, PNorm = 79.2061, GNorm = 0.6218, lr_0 = 3.3471e-04
Loss = 1.1817e-01, PNorm = 79.2145, GNorm = 0.6916, lr_0 = 3.3448e-04
Loss = 9.2364e-02, PNorm = 79.2258, GNorm = 0.6069, lr_0 = 3.3425e-04
Loss = 9.2926e-02, PNorm = 79.2321, GNorm = 0.8191, lr_0 = 3.3403e-04
Loss = 1.1226e-01, PNorm = 79.2365, GNorm = 0.6188, lr_0 = 3.3380e-04
Loss = 1.2713e-01, PNorm = 79.2376, GNorm = 0.7438, lr_0 = 3.3357e-04
Loss = 9.6450e-02, PNorm = 79.2470, GNorm = 0.7919, lr_0 = 3.3334e-04
Loss = 1.2360e-01, PNorm = 79.2647, GNorm = 0.7169, lr_0 = 3.3311e-04
Loss = 1.0252e-01, PNorm = 79.2747, GNorm = 0.5074, lr_0 = 3.3288e-04
Loss = 1.0645e-01, PNorm = 79.2869, GNorm = 0.6415, lr_0 = 3.3265e-04
Loss = 1.0368e-01, PNorm = 79.2894, GNorm = 0.8804, lr_0 = 3.3243e-04
Loss = 9.9450e-02, PNorm = 79.2972, GNorm = 0.7247, lr_0 = 3.3220e-04
Loss = 1.0157e-01, PNorm = 79.3089, GNorm = 0.5383, lr_0 = 3.3197e-04
Loss = 9.6811e-02, PNorm = 79.3198, GNorm = 0.5614, lr_0 = 3.3174e-04
Loss = 9.6753e-02, PNorm = 79.3272, GNorm = 0.6043, lr_0 = 3.3152e-04
Loss = 9.2845e-02, PNorm = 79.3327, GNorm = 0.9623, lr_0 = 3.3129e-04
Loss = 1.2137e-01, PNorm = 79.3416, GNorm = 0.7433, lr_0 = 3.3106e-04
Loss = 1.0663e-01, PNorm = 79.3521, GNorm = 0.6937, lr_0 = 3.3084e-04
Loss = 1.1150e-01, PNorm = 79.3558, GNorm = 0.7728, lr_0 = 3.3061e-04
Loss = 1.1349e-01, PNorm = 79.3630, GNorm = 0.7944, lr_0 = 3.3038e-04
Loss = 9.3164e-02, PNorm = 79.3692, GNorm = 0.6189, lr_0 = 3.3016e-04
Loss = 1.0949e-01, PNorm = 79.3748, GNorm = 0.6523, lr_0 = 3.2993e-04
Loss = 1.0653e-01, PNorm = 79.3848, GNorm = 0.6795, lr_0 = 3.2970e-04
Loss = 1.0838e-01, PNorm = 79.3903, GNorm = 0.8462, lr_0 = 3.2948e-04
Loss = 1.2073e-01, PNorm = 79.3962, GNorm = 0.9704, lr_0 = 3.2925e-04
Loss = 9.9338e-02, PNorm = 79.4024, GNorm = 0.9700, lr_0 = 3.2903e-04
Loss = 9.8963e-02, PNorm = 79.4069, GNorm = 0.3995, lr_0 = 3.2880e-04
Loss = 1.0377e-01, PNorm = 79.4169, GNorm = 0.5899, lr_0 = 3.2858e-04
Loss = 1.0679e-01, PNorm = 79.4246, GNorm = 0.7412, lr_0 = 3.2835e-04
Loss = 1.1716e-01, PNorm = 79.4311, GNorm = 0.7221, lr_0 = 3.2813e-04
Loss = 1.0316e-01, PNorm = 79.4420, GNorm = 0.6471, lr_0 = 3.2790e-04
Loss = 1.1707e-01, PNorm = 79.4482, GNorm = 0.9275, lr_0 = 3.2768e-04
Loss = 9.7876e-02, PNorm = 79.4556, GNorm = 0.6588, lr_0 = 3.2745e-04
Loss = 1.1380e-01, PNorm = 79.4601, GNorm = 0.8114, lr_0 = 3.2723e-04
Loss = 1.0748e-01, PNorm = 79.4663, GNorm = 0.8181, lr_0 = 3.2700e-04
Loss = 1.2867e-01, PNorm = 79.4732, GNorm = 0.9244, lr_0 = 3.2678e-04
Loss = 1.0689e-01, PNorm = 79.4801, GNorm = 0.7810, lr_0 = 3.2656e-04
Loss = 1.1136e-01, PNorm = 79.4893, GNorm = 0.8046, lr_0 = 3.2633e-04
Loss = 1.0745e-01, PNorm = 79.4989, GNorm = 0.7057, lr_0 = 3.2611e-04
Loss = 1.1156e-01, PNorm = 79.5104, GNorm = 0.6041, lr_0 = 3.2589e-04
Loss = 1.1456e-01, PNorm = 79.5161, GNorm = 0.6338, lr_0 = 3.2566e-04
Loss = 1.1019e-01, PNorm = 79.5245, GNorm = 0.5677, lr_0 = 3.2544e-04
Loss = 1.0167e-01, PNorm = 79.5311, GNorm = 0.7692, lr_0 = 3.2522e-04
Loss = 1.0313e-01, PNorm = 79.5369, GNorm = 0.4650, lr_0 = 3.2499e-04
Loss = 9.9509e-02, PNorm = 79.5478, GNorm = 0.7467, lr_0 = 3.2477e-04
Loss = 8.5163e-02, PNorm = 79.5541, GNorm = 0.5230, lr_0 = 3.2455e-04
Loss = 1.0939e-01, PNorm = 79.5614, GNorm = 0.6052, lr_0 = 3.2433e-04
Loss = 1.1749e-01, PNorm = 79.5664, GNorm = 0.5194, lr_0 = 3.2410e-04
Loss = 1.0999e-01, PNorm = 79.5692, GNorm = 0.6117, lr_0 = 3.2388e-04
Loss = 1.0365e-01, PNorm = 79.5759, GNorm = 0.6746, lr_0 = 3.2366e-04
Loss = 1.1093e-01, PNorm = 79.5839, GNorm = 0.5822, lr_0 = 3.2344e-04
Loss = 1.0521e-01, PNorm = 79.5949, GNorm = 0.6711, lr_0 = 3.2322e-04
Loss = 1.0479e-01, PNorm = 79.6028, GNorm = 0.4537, lr_0 = 3.2300e-04
Loss = 1.0071e-01, PNorm = 79.6086, GNorm = 0.7056, lr_0 = 3.2277e-04
Loss = 1.2660e-01, PNorm = 79.6134, GNorm = 0.8479, lr_0 = 3.2255e-04
Loss = 1.0524e-01, PNorm = 79.6176, GNorm = 0.5965, lr_0 = 3.2233e-04
Loss = 1.1376e-01, PNorm = 79.6236, GNorm = 1.0759, lr_0 = 3.2211e-04
Loss = 1.1346e-01, PNorm = 79.6242, GNorm = 0.7208, lr_0 = 3.2189e-04
Loss = 1.0490e-01, PNorm = 79.6278, GNorm = 0.8340, lr_0 = 3.2167e-04
Loss = 1.0773e-01, PNorm = 79.6383, GNorm = 0.8899, lr_0 = 3.2145e-04
Loss = 1.0700e-01, PNorm = 79.6484, GNorm = 0.8179, lr_0 = 3.2123e-04
Loss = 1.2017e-01, PNorm = 79.6564, GNorm = 0.6992, lr_0 = 3.2101e-04
Loss = 1.3272e-01, PNorm = 79.6619, GNorm = 0.6605, lr_0 = 3.2079e-04
Loss = 1.1591e-01, PNorm = 79.6661, GNorm = 0.9012, lr_0 = 3.2057e-04
Loss = 1.1542e-01, PNorm = 79.6754, GNorm = 0.7053, lr_0 = 3.2035e-04
Loss = 1.0943e-01, PNorm = 79.6827, GNorm = 0.6002, lr_0 = 3.2013e-04
Loss = 1.1682e-01, PNorm = 79.6915, GNorm = 0.7226, lr_0 = 3.1991e-04
Loss = 1.1828e-01, PNorm = 79.7002, GNorm = 0.8943, lr_0 = 3.1969e-04
Loss = 1.0582e-01, PNorm = 79.7095, GNorm = 0.7504, lr_0 = 3.1947e-04
Loss = 9.8942e-02, PNorm = 79.7179, GNorm = 0.9204, lr_0 = 3.1925e-04
Loss = 1.2103e-01, PNorm = 79.7271, GNorm = 0.6521, lr_0 = 3.1904e-04
Loss = 1.2174e-01, PNorm = 79.7277, GNorm = 0.6882, lr_0 = 3.1882e-04
Loss = 9.8112e-02, PNorm = 79.7298, GNorm = 0.6527, lr_0 = 3.1860e-04
Loss = 1.1036e-01, PNorm = 79.7355, GNorm = 0.9409, lr_0 = 3.1838e-04
Loss = 9.5884e-02, PNorm = 79.7440, GNorm = 0.4946, lr_0 = 3.1816e-04
Loss = 1.2077e-01, PNorm = 79.7506, GNorm = 0.9612, lr_0 = 3.1794e-04
Loss = 1.0268e-01, PNorm = 79.7563, GNorm = 0.5175, lr_0 = 3.1773e-04
Loss = 9.8832e-02, PNorm = 79.7616, GNorm = 0.6405, lr_0 = 3.1751e-04
Loss = 1.1386e-01, PNorm = 79.7684, GNorm = 0.6658, lr_0 = 3.1729e-04
Loss = 1.0822e-01, PNorm = 79.7791, GNorm = 0.5712, lr_0 = 3.1707e-04
Loss = 1.0796e-01, PNorm = 79.7839, GNorm = 0.5871, lr_0 = 3.1686e-04
Loss = 1.0955e-01, PNorm = 79.7946, GNorm = 0.6753, lr_0 = 3.1664e-04
Loss = 1.5124e-01, PNorm = 79.8074, GNorm = 0.4294, lr_0 = 3.1642e-04
Loss = 1.0470e-01, PNorm = 79.8193, GNorm = 0.7501, lr_0 = 3.1621e-04
Validation mae = 0.235524
Epoch 16
Loss = 9.3356e-02, PNorm = 79.8258, GNorm = 0.5470, lr_0 = 3.1599e-04
Loss = 9.6500e-02, PNorm = 79.8333, GNorm = 0.6602, lr_0 = 3.1577e-04
Loss = 1.0914e-01, PNorm = 79.8390, GNorm = 0.6757, lr_0 = 3.1556e-04
Loss = 9.4642e-02, PNorm = 79.8445, GNorm = 0.4604, lr_0 = 3.1534e-04
Loss = 1.1628e-01, PNorm = 79.8538, GNorm = 0.6145, lr_0 = 3.1512e-04
Loss = 1.0369e-01, PNorm = 79.8636, GNorm = 0.5018, lr_0 = 3.1491e-04
Loss = 9.1349e-02, PNorm = 79.8747, GNorm = 0.6731, lr_0 = 3.1469e-04
Loss = 1.0104e-01, PNorm = 79.8781, GNorm = 0.6466, lr_0 = 3.1448e-04
Loss = 1.0121e-01, PNorm = 79.8868, GNorm = 0.5576, lr_0 = 3.1426e-04
Loss = 1.1231e-01, PNorm = 79.8981, GNorm = 0.8662, lr_0 = 3.1405e-04
Loss = 9.4076e-02, PNorm = 79.9035, GNorm = 0.4746, lr_0 = 3.1383e-04
Loss = 1.0369e-01, PNorm = 79.9116, GNorm = 0.6232, lr_0 = 3.1362e-04
Loss = 1.0159e-01, PNorm = 79.9220, GNorm = 0.7701, lr_0 = 3.1340e-04
Loss = 9.7890e-02, PNorm = 79.9284, GNorm = 0.5291, lr_0 = 3.1319e-04
Loss = 1.0684e-01, PNorm = 79.9339, GNorm = 1.0827, lr_0 = 3.1297e-04
Loss = 1.0370e-01, PNorm = 79.9368, GNorm = 0.8623, lr_0 = 3.1276e-04
Loss = 1.1198e-01, PNorm = 79.9460, GNorm = 0.7901, lr_0 = 3.1254e-04
Loss = 9.9141e-02, PNorm = 79.9562, GNorm = 0.6625, lr_0 = 3.1233e-04
Loss = 9.7250e-02, PNorm = 79.9625, GNorm = 0.7884, lr_0 = 3.1212e-04
Loss = 1.0910e-01, PNorm = 79.9696, GNorm = 0.7659, lr_0 = 3.1190e-04
Loss = 1.0236e-01, PNorm = 79.9715, GNorm = 0.8096, lr_0 = 3.1169e-04
Loss = 1.2018e-01, PNorm = 79.9783, GNorm = 0.7235, lr_0 = 3.1147e-04
Loss = 1.0014e-01, PNorm = 79.9851, GNorm = 0.6563, lr_0 = 3.1126e-04
Loss = 1.0601e-01, PNorm = 79.9905, GNorm = 0.6148, lr_0 = 3.1105e-04
Loss = 1.1031e-01, PNorm = 79.9961, GNorm = 0.7745, lr_0 = 3.1083e-04
Loss = 1.0675e-01, PNorm = 80.0048, GNorm = 0.7105, lr_0 = 3.1062e-04
Loss = 9.3333e-02, PNorm = 80.0105, GNorm = 0.7548, lr_0 = 3.1041e-04
Loss = 8.7264e-02, PNorm = 80.0192, GNorm = 0.5656, lr_0 = 3.1020e-04
Loss = 1.0394e-01, PNorm = 80.0270, GNorm = 0.5272, lr_0 = 3.0998e-04
Loss = 1.1785e-01, PNorm = 80.0321, GNorm = 1.3277, lr_0 = 3.0977e-04
Loss = 9.5388e-02, PNorm = 80.0382, GNorm = 0.5269, lr_0 = 3.0956e-04
Loss = 9.7795e-02, PNorm = 80.0448, GNorm = 0.6704, lr_0 = 3.0935e-04
Loss = 1.0107e-01, PNorm = 80.0511, GNorm = 0.8267, lr_0 = 3.0914e-04
Loss = 1.0240e-01, PNorm = 80.0586, GNorm = 0.5215, lr_0 = 3.0892e-04
Loss = 9.5212e-02, PNorm = 80.0664, GNorm = 0.6883, lr_0 = 3.0871e-04
Loss = 9.5799e-02, PNorm = 80.0736, GNorm = 0.6408, lr_0 = 3.0850e-04
Loss = 1.1878e-01, PNorm = 80.0771, GNorm = 0.7012, lr_0 = 3.0829e-04
Loss = 8.9207e-02, PNorm = 80.0821, GNorm = 0.6154, lr_0 = 3.0808e-04
Loss = 1.1883e-01, PNorm = 80.0908, GNorm = 0.7535, lr_0 = 3.0787e-04
Loss = 1.1057e-01, PNorm = 80.0972, GNorm = 0.5683, lr_0 = 3.0766e-04
Loss = 9.9469e-02, PNorm = 80.1071, GNorm = 0.5989, lr_0 = 3.0745e-04
Loss = 1.1847e-01, PNorm = 80.1133, GNorm = 0.8376, lr_0 = 3.0723e-04
Loss = 1.0698e-01, PNorm = 80.1201, GNorm = 0.5352, lr_0 = 3.0702e-04
Loss = 1.0045e-01, PNorm = 80.1257, GNorm = 0.8912, lr_0 = 3.0681e-04
Loss = 1.0078e-01, PNorm = 80.1283, GNorm = 0.6477, lr_0 = 3.0660e-04
Loss = 1.0631e-01, PNorm = 80.1362, GNorm = 0.7288, lr_0 = 3.0639e-04
Loss = 1.0012e-01, PNorm = 80.1419, GNorm = 0.6417, lr_0 = 3.0618e-04
Loss = 1.0678e-01, PNorm = 80.1445, GNorm = 0.5644, lr_0 = 3.0597e-04
Loss = 9.9557e-02, PNorm = 80.1534, GNorm = 0.8063, lr_0 = 3.0576e-04
Loss = 9.8931e-02, PNorm = 80.1608, GNorm = 0.6611, lr_0 = 3.0555e-04
Loss = 1.0251e-01, PNorm = 80.1650, GNorm = 0.5893, lr_0 = 3.0535e-04
Loss = 1.0126e-01, PNorm = 80.1687, GNorm = 0.5425, lr_0 = 3.0514e-04
Loss = 8.9419e-02, PNorm = 80.1774, GNorm = 0.5982, lr_0 = 3.0493e-04
Loss = 1.0794e-01, PNorm = 80.1850, GNorm = 0.6167, lr_0 = 3.0472e-04
Loss = 9.8895e-02, PNorm = 80.1957, GNorm = 0.9360, lr_0 = 3.0451e-04
Loss = 1.0681e-01, PNorm = 80.2028, GNorm = 0.6520, lr_0 = 3.0430e-04
Loss = 9.7785e-02, PNorm = 80.2102, GNorm = 0.5942, lr_0 = 3.0409e-04
Loss = 9.4793e-02, PNorm = 80.2192, GNorm = 1.0737, lr_0 = 3.0388e-04
Loss = 1.0622e-01, PNorm = 80.2228, GNorm = 0.6626, lr_0 = 3.0368e-04
Loss = 9.6508e-02, PNorm = 80.2263, GNorm = 0.6878, lr_0 = 3.0347e-04
Loss = 9.9818e-02, PNorm = 80.2326, GNorm = 0.9006, lr_0 = 3.0326e-04
Loss = 9.9309e-02, PNorm = 80.2400, GNorm = 0.6610, lr_0 = 3.0305e-04
Loss = 1.0708e-01, PNorm = 80.2468, GNorm = 0.8340, lr_0 = 3.0284e-04
Loss = 1.2748e-01, PNorm = 80.2495, GNorm = 0.7986, lr_0 = 3.0264e-04
Loss = 1.1879e-01, PNorm = 80.2561, GNorm = 0.6560, lr_0 = 3.0243e-04
Loss = 1.0608e-01, PNorm = 80.2624, GNorm = 0.7015, lr_0 = 3.0222e-04
Loss = 1.0253e-01, PNorm = 80.2706, GNorm = 0.6586, lr_0 = 3.0202e-04
Loss = 9.6321e-02, PNorm = 80.2756, GNorm = 0.7563, lr_0 = 3.0181e-04
Loss = 1.1231e-01, PNorm = 80.2792, GNorm = 0.6612, lr_0 = 3.0160e-04
Loss = 1.1259e-01, PNorm = 80.2855, GNorm = 0.7261, lr_0 = 3.0140e-04
Loss = 1.1496e-01, PNorm = 80.2900, GNorm = 0.6907, lr_0 = 3.0119e-04
Loss = 1.0595e-01, PNorm = 80.2935, GNorm = 0.7038, lr_0 = 3.0098e-04
Loss = 1.1247e-01, PNorm = 80.3024, GNorm = 0.6532, lr_0 = 3.0078e-04
Loss = 1.0826e-01, PNorm = 80.3080, GNorm = 0.6086, lr_0 = 3.0057e-04
Loss = 1.1416e-01, PNorm = 80.3140, GNorm = 0.6129, lr_0 = 3.0036e-04
Loss = 1.2165e-01, PNorm = 80.3204, GNorm = 0.8134, lr_0 = 3.0016e-04
Loss = 1.2431e-01, PNorm = 80.3288, GNorm = 0.5957, lr_0 = 2.9995e-04
Loss = 9.1503e-02, PNorm = 80.3362, GNorm = 0.6590, lr_0 = 2.9975e-04
Loss = 1.0156e-01, PNorm = 80.3396, GNorm = 0.5813, lr_0 = 2.9954e-04
Loss = 1.0792e-01, PNorm = 80.3448, GNorm = 0.7417, lr_0 = 2.9934e-04
Loss = 1.1621e-01, PNorm = 80.3521, GNorm = 0.8315, lr_0 = 2.9913e-04
Loss = 1.0947e-01, PNorm = 80.3582, GNorm = 0.6245, lr_0 = 2.9893e-04
Loss = 1.0517e-01, PNorm = 80.3650, GNorm = 0.9179, lr_0 = 2.9872e-04
Loss = 9.4240e-02, PNorm = 80.3714, GNorm = 0.5860, lr_0 = 2.9852e-04
Loss = 1.0840e-01, PNorm = 80.3787, GNorm = 0.6511, lr_0 = 2.9831e-04
Loss = 1.0770e-01, PNorm = 80.3835, GNorm = 0.5755, lr_0 = 2.9811e-04
Loss = 1.1495e-01, PNorm = 80.3904, GNorm = 0.8110, lr_0 = 2.9790e-04
Loss = 9.6775e-02, PNorm = 80.3951, GNorm = 0.6562, lr_0 = 2.9770e-04
Loss = 1.0954e-01, PNorm = 80.4005, GNorm = 0.8445, lr_0 = 2.9750e-04
Loss = 1.0402e-01, PNorm = 80.4044, GNorm = 0.7070, lr_0 = 2.9729e-04
Loss = 9.4136e-02, PNorm = 80.4102, GNorm = 0.6171, lr_0 = 2.9709e-04
Loss = 1.0911e-01, PNorm = 80.4182, GNorm = 0.6956, lr_0 = 2.9689e-04
Loss = 1.1124e-01, PNorm = 80.4238, GNorm = 1.1900, lr_0 = 2.9668e-04
Loss = 9.7861e-02, PNorm = 80.4320, GNorm = 0.8145, lr_0 = 2.9648e-04
Loss = 1.1264e-01, PNorm = 80.4390, GNorm = 0.5723, lr_0 = 2.9628e-04
Loss = 1.0442e-01, PNorm = 80.4489, GNorm = 0.6257, lr_0 = 2.9607e-04
Loss = 9.8231e-02, PNorm = 80.4562, GNorm = 0.6958, lr_0 = 2.9587e-04
Loss = 1.0599e-01, PNorm = 80.4634, GNorm = 0.6231, lr_0 = 2.9567e-04
Loss = 1.1759e-01, PNorm = 80.4742, GNorm = 0.5101, lr_0 = 2.9546e-04
Loss = 1.0272e-01, PNorm = 80.4802, GNorm = 0.5838, lr_0 = 2.9526e-04
Loss = 1.0703e-01, PNorm = 80.4879, GNorm = 0.9037, lr_0 = 2.9506e-04
Loss = 1.0535e-01, PNorm = 80.4992, GNorm = 0.8697, lr_0 = 2.9486e-04
Loss = 1.0711e-01, PNorm = 80.5041, GNorm = 0.6117, lr_0 = 2.9466e-04
Loss = 9.3495e-02, PNorm = 80.5079, GNorm = 0.5949, lr_0 = 2.9445e-04
Loss = 1.0773e-01, PNorm = 80.5123, GNorm = 0.9672, lr_0 = 2.9425e-04
Loss = 1.0243e-01, PNorm = 80.5184, GNorm = 0.5765, lr_0 = 2.9405e-04
Loss = 1.0694e-01, PNorm = 80.5250, GNorm = 0.7650, lr_0 = 2.9385e-04
Loss = 9.6217e-02, PNorm = 80.5297, GNorm = 0.5164, lr_0 = 2.9365e-04
Loss = 1.0686e-01, PNorm = 80.5388, GNorm = 0.9618, lr_0 = 2.9345e-04
Loss = 9.8280e-02, PNorm = 80.5403, GNorm = 0.4931, lr_0 = 2.9325e-04
Loss = 1.0247e-01, PNorm = 80.5448, GNorm = 1.0234, lr_0 = 2.9305e-04
Loss = 1.0599e-01, PNorm = 80.5504, GNorm = 0.5605, lr_0 = 2.9284e-04
Loss = 1.0010e-01, PNorm = 80.5577, GNorm = 0.6664, lr_0 = 2.9264e-04
Loss = 9.8497e-02, PNorm = 80.5623, GNorm = 0.8186, lr_0 = 2.9244e-04
Loss = 1.1105e-01, PNorm = 80.5659, GNorm = 0.7827, lr_0 = 2.9224e-04
Loss = 1.0863e-01, PNorm = 80.5737, GNorm = 0.6856, lr_0 = 2.9204e-04
Loss = 1.1390e-01, PNorm = 80.5781, GNorm = 0.6121, lr_0 = 2.9184e-04
Loss = 1.0324e-01, PNorm = 80.5841, GNorm = 0.4396, lr_0 = 2.9164e-04
Loss = 1.1585e-01, PNorm = 80.5909, GNorm = 0.5065, lr_0 = 2.9144e-04
Loss = 1.0544e-01, PNorm = 80.6011, GNorm = 0.5631, lr_0 = 2.9124e-04
Validation mae = 0.227112
Epoch 17
Loss = 1.0024e-01, PNorm = 80.6068, GNorm = 0.8170, lr_0 = 2.9104e-04
Loss = 9.8108e-02, PNorm = 80.6161, GNorm = 0.6453, lr_0 = 2.9084e-04
Loss = 1.0165e-01, PNorm = 80.6239, GNorm = 0.8411, lr_0 = 2.9065e-04
Loss = 9.2409e-02, PNorm = 80.6336, GNorm = 0.9423, lr_0 = 2.9045e-04
Loss = 8.9211e-02, PNorm = 80.6395, GNorm = 0.7161, lr_0 = 2.9025e-04
Loss = 9.1687e-02, PNorm = 80.6432, GNorm = 0.7782, lr_0 = 2.9005e-04
Loss = 9.2511e-02, PNorm = 80.6471, GNorm = 0.7611, lr_0 = 2.8985e-04
Loss = 9.5501e-02, PNorm = 80.6529, GNorm = 0.6442, lr_0 = 2.8965e-04
Loss = 1.0366e-01, PNorm = 80.6593, GNorm = 0.7685, lr_0 = 2.8945e-04
Loss = 8.9962e-02, PNorm = 80.6677, GNorm = 0.5722, lr_0 = 2.8925e-04
Loss = 9.5536e-02, PNorm = 80.6745, GNorm = 0.5298, lr_0 = 2.8906e-04
Loss = 8.6149e-02, PNorm = 80.6809, GNorm = 0.5718, lr_0 = 2.8886e-04
Loss = 9.1561e-02, PNorm = 80.6846, GNorm = 0.4769, lr_0 = 2.8866e-04
Loss = 9.6899e-02, PNorm = 80.6904, GNorm = 0.5158, lr_0 = 2.8846e-04
Loss = 9.4364e-02, PNorm = 80.6911, GNorm = 0.6379, lr_0 = 2.8826e-04
Loss = 1.0557e-01, PNorm = 80.6997, GNorm = 0.7464, lr_0 = 2.8807e-04
Loss = 9.0298e-02, PNorm = 80.7048, GNorm = 0.4763, lr_0 = 2.8787e-04
Loss = 9.4147e-02, PNorm = 80.7134, GNorm = 0.7856, lr_0 = 2.8767e-04
Loss = 9.9779e-02, PNorm = 80.7189, GNorm = 0.6984, lr_0 = 2.8748e-04
Loss = 9.2484e-02, PNorm = 80.7250, GNorm = 0.6218, lr_0 = 2.8728e-04
Loss = 1.2911e-01, PNorm = 80.7349, GNorm = 0.8997, lr_0 = 2.8708e-04
Loss = 9.0335e-02, PNorm = 80.7426, GNorm = 0.4708, lr_0 = 2.8689e-04
Loss = 9.7178e-02, PNorm = 80.7501, GNorm = 0.5891, lr_0 = 2.8669e-04
Loss = 9.7228e-02, PNorm = 80.7535, GNorm = 0.5919, lr_0 = 2.8649e-04
Loss = 1.0986e-01, PNorm = 80.7609, GNorm = 0.6756, lr_0 = 2.8630e-04
Loss = 1.0564e-01, PNorm = 80.7715, GNorm = 0.8491, lr_0 = 2.8610e-04
Loss = 9.4948e-02, PNorm = 80.7799, GNorm = 0.6376, lr_0 = 2.8590e-04
Loss = 1.0354e-01, PNorm = 80.7892, GNorm = 0.6185, lr_0 = 2.8571e-04
Loss = 1.0241e-01, PNorm = 80.7941, GNorm = 0.7266, lr_0 = 2.8551e-04
Loss = 9.0516e-02, PNorm = 80.7999, GNorm = 0.5223, lr_0 = 2.8532e-04
Loss = 1.0847e-01, PNorm = 80.8058, GNorm = 1.0239, lr_0 = 2.8512e-04
Loss = 1.1063e-01, PNorm = 80.8083, GNorm = 0.6641, lr_0 = 2.8493e-04
Loss = 1.1260e-01, PNorm = 80.8169, GNorm = 0.5877, lr_0 = 2.8473e-04
Loss = 9.0387e-02, PNorm = 80.8252, GNorm = 0.8705, lr_0 = 2.8454e-04
Loss = 8.3289e-02, PNorm = 80.8285, GNorm = 0.6080, lr_0 = 2.8434e-04
Loss = 8.5867e-02, PNorm = 80.8299, GNorm = 0.6426, lr_0 = 2.8415e-04
Loss = 9.7630e-02, PNorm = 80.8343, GNorm = 0.5270, lr_0 = 2.8395e-04
Loss = 8.8945e-02, PNorm = 80.8393, GNorm = 0.6433, lr_0 = 2.8376e-04
Loss = 1.0485e-01, PNorm = 80.8441, GNorm = 0.8411, lr_0 = 2.8356e-04
Loss = 1.0389e-01, PNorm = 80.8507, GNorm = 0.5873, lr_0 = 2.8337e-04
Loss = 1.1256e-01, PNorm = 80.8590, GNorm = 0.7438, lr_0 = 2.8317e-04
Loss = 9.5248e-02, PNorm = 80.8680, GNorm = 0.5586, lr_0 = 2.8298e-04
Loss = 8.7697e-02, PNorm = 80.8719, GNorm = 0.5860, lr_0 = 2.8279e-04
Loss = 9.4055e-02, PNorm = 80.8786, GNorm = 0.7353, lr_0 = 2.8259e-04
Loss = 8.9500e-02, PNorm = 80.8838, GNorm = 0.4448, lr_0 = 2.8240e-04
Loss = 8.7086e-02, PNorm = 80.8891, GNorm = 0.6467, lr_0 = 2.8221e-04
Loss = 1.0452e-01, PNorm = 80.9010, GNorm = 0.8039, lr_0 = 2.8201e-04
Loss = 1.0156e-01, PNorm = 80.9076, GNorm = 0.6045, lr_0 = 2.8182e-04
Loss = 9.2329e-02, PNorm = 80.9131, GNorm = 1.3087, lr_0 = 2.8163e-04
Loss = 1.0291e-01, PNorm = 80.9188, GNorm = 0.5807, lr_0 = 2.8143e-04
Loss = 1.1335e-01, PNorm = 80.9281, GNorm = 0.5533, lr_0 = 2.8124e-04
Loss = 1.1067e-01, PNorm = 80.9369, GNorm = 0.6812, lr_0 = 2.8105e-04
Loss = 1.0707e-01, PNorm = 80.9460, GNorm = 0.5910, lr_0 = 2.8085e-04
Loss = 9.5487e-02, PNorm = 80.9506, GNorm = 0.6877, lr_0 = 2.8066e-04
Loss = 9.7943e-02, PNorm = 80.9535, GNorm = 0.5493, lr_0 = 2.8047e-04
Loss = 1.1234e-01, PNorm = 80.9628, GNorm = 0.8133, lr_0 = 2.8028e-04
Loss = 1.1110e-01, PNorm = 80.9683, GNorm = 0.6371, lr_0 = 2.8009e-04
Loss = 1.0616e-01, PNorm = 80.9734, GNorm = 0.9988, lr_0 = 2.7989e-04
Loss = 1.0507e-01, PNorm = 80.9783, GNorm = 0.6256, lr_0 = 2.7970e-04
Loss = 9.0859e-02, PNorm = 80.9843, GNorm = 0.6395, lr_0 = 2.7951e-04
Loss = 1.2276e-01, PNorm = 80.9906, GNorm = 0.7726, lr_0 = 2.7932e-04
Loss = 1.0998e-01, PNorm = 81.0014, GNorm = 0.9158, lr_0 = 2.7913e-04
Loss = 1.0169e-01, PNorm = 81.0053, GNorm = 1.1999, lr_0 = 2.7894e-04
Loss = 9.9313e-02, PNorm = 81.0118, GNorm = 1.0576, lr_0 = 2.7875e-04
Loss = 9.7140e-02, PNorm = 81.0207, GNorm = 0.6799, lr_0 = 2.7855e-04
Loss = 1.1126e-01, PNorm = 81.0302, GNorm = 1.4447, lr_0 = 2.7836e-04
Loss = 1.0200e-01, PNorm = 81.0355, GNorm = 0.6259, lr_0 = 2.7817e-04
Loss = 9.6531e-02, PNorm = 81.0423, GNorm = 0.5409, lr_0 = 2.7798e-04
Loss = 9.4831e-02, PNorm = 81.0469, GNorm = 0.4395, lr_0 = 2.7779e-04
Loss = 1.0596e-01, PNorm = 81.0505, GNorm = 0.7592, lr_0 = 2.7760e-04
Loss = 8.4523e-02, PNorm = 81.0573, GNorm = 0.4317, lr_0 = 2.7741e-04
Loss = 1.0466e-01, PNorm = 81.0633, GNorm = 0.5974, lr_0 = 2.7722e-04
Loss = 1.0202e-01, PNorm = 81.0695, GNorm = 0.7069, lr_0 = 2.7703e-04
Loss = 1.0187e-01, PNorm = 81.0777, GNorm = 0.5471, lr_0 = 2.7684e-04
Loss = 1.0872e-01, PNorm = 81.0821, GNorm = 0.7024, lr_0 = 2.7665e-04
Loss = 1.1225e-01, PNorm = 81.0865, GNorm = 0.6226, lr_0 = 2.7646e-04
Loss = 9.6902e-02, PNorm = 81.0912, GNorm = 0.5652, lr_0 = 2.7627e-04
Loss = 1.1983e-01, PNorm = 81.0953, GNorm = 0.6672, lr_0 = 2.7608e-04
Loss = 1.0070e-01, PNorm = 81.1017, GNorm = 0.4811, lr_0 = 2.7590e-04
Loss = 1.1615e-01, PNorm = 81.1105, GNorm = 0.6174, lr_0 = 2.7571e-04
Loss = 1.1341e-01, PNorm = 81.1153, GNorm = 0.7581, lr_0 = 2.7552e-04
Loss = 1.0506e-01, PNorm = 81.1186, GNorm = 0.7535, lr_0 = 2.7533e-04
Loss = 1.0390e-01, PNorm = 81.1220, GNorm = 0.7814, lr_0 = 2.7514e-04
Loss = 1.0951e-01, PNorm = 81.1259, GNorm = 0.6565, lr_0 = 2.7495e-04
Loss = 1.0092e-01, PNorm = 81.1329, GNorm = 0.6235, lr_0 = 2.7476e-04
Loss = 1.1999e-01, PNorm = 81.1387, GNorm = 0.7226, lr_0 = 2.7457e-04
Loss = 1.1572e-01, PNorm = 81.1478, GNorm = 0.7387, lr_0 = 2.7439e-04
Loss = 1.0091e-01, PNorm = 81.1550, GNorm = 0.7073, lr_0 = 2.7420e-04
Loss = 1.1553e-01, PNorm = 81.1594, GNorm = 0.5780, lr_0 = 2.7401e-04
Loss = 1.0076e-01, PNorm = 81.1654, GNorm = 0.7023, lr_0 = 2.7382e-04
Loss = 1.1353e-01, PNorm = 81.1713, GNorm = 0.5664, lr_0 = 2.7364e-04
Loss = 1.1598e-01, PNorm = 81.1756, GNorm = 0.5800, lr_0 = 2.7345e-04
Loss = 8.9784e-02, PNorm = 81.1813, GNorm = 0.7553, lr_0 = 2.7326e-04
Loss = 1.0325e-01, PNorm = 81.1863, GNorm = 0.5439, lr_0 = 2.7307e-04
Loss = 9.5877e-02, PNorm = 81.1919, GNorm = 0.4554, lr_0 = 2.7289e-04
Loss = 1.0217e-01, PNorm = 81.1969, GNorm = 0.6910, lr_0 = 2.7270e-04
Loss = 9.7918e-02, PNorm = 81.2045, GNorm = 1.3859, lr_0 = 2.7251e-04
Loss = 1.1015e-01, PNorm = 81.2063, GNorm = 0.6266, lr_0 = 2.7233e-04
Loss = 1.2240e-01, PNorm = 81.2156, GNorm = 0.7516, lr_0 = 2.7214e-04
Loss = 9.1959e-02, PNorm = 81.2271, GNorm = 0.9157, lr_0 = 2.7195e-04
Loss = 1.2105e-01, PNorm = 81.2342, GNorm = 0.9451, lr_0 = 2.7177e-04
Loss = 1.0524e-01, PNorm = 81.2398, GNorm = 0.9903, lr_0 = 2.7158e-04
Loss = 1.0377e-01, PNorm = 81.2460, GNorm = 0.9934, lr_0 = 2.7139e-04
Loss = 1.1278e-01, PNorm = 81.2515, GNorm = 0.6860, lr_0 = 2.7121e-04
Loss = 1.0560e-01, PNorm = 81.2577, GNorm = 0.7722, lr_0 = 2.7102e-04
Loss = 1.0936e-01, PNorm = 81.2650, GNorm = 0.6952, lr_0 = 2.7084e-04
Loss = 9.5951e-02, PNorm = 81.2725, GNorm = 0.5670, lr_0 = 2.7065e-04
Loss = 1.0268e-01, PNorm = 81.2793, GNorm = 0.5015, lr_0 = 2.7047e-04
Loss = 1.0064e-01, PNorm = 81.2853, GNorm = 0.5642, lr_0 = 2.7028e-04
Loss = 1.0103e-01, PNorm = 81.2866, GNorm = 0.5586, lr_0 = 2.7010e-04
Loss = 9.9942e-02, PNorm = 81.2915, GNorm = 0.5820, lr_0 = 2.6991e-04
Loss = 9.1006e-02, PNorm = 81.2924, GNorm = 0.7776, lr_0 = 2.6973e-04
Loss = 1.0267e-01, PNorm = 81.2946, GNorm = 0.5640, lr_0 = 2.6954e-04
Loss = 8.3095e-02, PNorm = 81.3007, GNorm = 0.5923, lr_0 = 2.6936e-04
Loss = 1.1136e-01, PNorm = 81.3076, GNorm = 0.6719, lr_0 = 2.6917e-04
Loss = 8.8263e-02, PNorm = 81.3136, GNorm = 0.7235, lr_0 = 2.6899e-04
Loss = 9.7893e-02, PNorm = 81.3140, GNorm = 0.7444, lr_0 = 2.6880e-04
Loss = 9.1146e-02, PNorm = 81.3171, GNorm = 0.8399, lr_0 = 2.6862e-04
Loss = 1.0407e-01, PNorm = 81.3228, GNorm = 0.5232, lr_0 = 2.6844e-04
Loss = 1.0354e-01, PNorm = 81.3263, GNorm = 0.5919, lr_0 = 2.6825e-04
Validation mae = 0.227656
Epoch 18
Loss = 1.0427e-01, PNorm = 81.3320, GNorm = 0.7439, lr_0 = 2.6807e-04
Loss = 8.6852e-02, PNorm = 81.3378, GNorm = 0.8264, lr_0 = 2.6788e-04
Loss = 9.5211e-02, PNorm = 81.3436, GNorm = 1.0129, lr_0 = 2.6770e-04
Loss = 1.0296e-01, PNorm = 81.3454, GNorm = 0.7112, lr_0 = 2.6752e-04
Loss = 8.9422e-02, PNorm = 81.3535, GNorm = 0.6159, lr_0 = 2.6733e-04
Loss = 1.0645e-01, PNorm = 81.3612, GNorm = 1.2481, lr_0 = 2.6715e-04
Loss = 1.0215e-01, PNorm = 81.3648, GNorm = 0.4235, lr_0 = 2.6697e-04
Loss = 8.9297e-02, PNorm = 81.3696, GNorm = 0.5515, lr_0 = 2.6678e-04
Loss = 9.1350e-02, PNorm = 81.3764, GNorm = 0.7149, lr_0 = 2.6660e-04
Loss = 9.0568e-02, PNorm = 81.3856, GNorm = 0.9976, lr_0 = 2.6642e-04
Loss = 8.4686e-02, PNorm = 81.3894, GNorm = 0.5548, lr_0 = 2.6624e-04
Loss = 9.4320e-02, PNorm = 81.3963, GNorm = 0.5110, lr_0 = 2.6605e-04
Loss = 1.1167e-01, PNorm = 81.4008, GNorm = 1.0988, lr_0 = 2.6587e-04
Loss = 1.1483e-01, PNorm = 81.4068, GNorm = 0.5915, lr_0 = 2.6569e-04
Loss = 1.0004e-01, PNorm = 81.4108, GNorm = 0.6209, lr_0 = 2.6551e-04
Loss = 9.5687e-02, PNorm = 81.4126, GNorm = 0.5187, lr_0 = 2.6533e-04
Loss = 1.0416e-01, PNorm = 81.4198, GNorm = 0.5657, lr_0 = 2.6514e-04
Loss = 8.9701e-02, PNorm = 81.4238, GNorm = 0.7300, lr_0 = 2.6496e-04
Loss = 9.0641e-02, PNorm = 81.4298, GNorm = 0.9293, lr_0 = 2.6478e-04
Loss = 1.0864e-01, PNorm = 81.4392, GNorm = 0.8613, lr_0 = 2.6460e-04
Loss = 1.0288e-01, PNorm = 81.4488, GNorm = 0.5222, lr_0 = 2.6442e-04
Loss = 8.4861e-02, PNorm = 81.4582, GNorm = 0.5956, lr_0 = 2.6424e-04
Loss = 1.0004e-01, PNorm = 81.4627, GNorm = 0.5337, lr_0 = 2.6406e-04
Loss = 1.0692e-01, PNorm = 81.4695, GNorm = 0.8161, lr_0 = 2.6388e-04
Loss = 9.1467e-02, PNorm = 81.4736, GNorm = 0.5424, lr_0 = 2.6369e-04
Loss = 1.0751e-01, PNorm = 81.4748, GNorm = 0.7114, lr_0 = 2.6351e-04
Loss = 9.2297e-02, PNorm = 81.4810, GNorm = 1.2314, lr_0 = 2.6333e-04
Loss = 9.4007e-02, PNorm = 81.4848, GNorm = 0.7429, lr_0 = 2.6315e-04
Loss = 9.9753e-02, PNorm = 81.4889, GNorm = 0.6194, lr_0 = 2.6297e-04
Loss = 9.4900e-02, PNorm = 81.4914, GNorm = 0.8068, lr_0 = 2.6279e-04
Loss = 1.0579e-01, PNorm = 81.4992, GNorm = 0.7459, lr_0 = 2.6261e-04
Loss = 9.0379e-02, PNorm = 81.5046, GNorm = 0.7139, lr_0 = 2.6243e-04
Loss = 9.9125e-02, PNorm = 81.5089, GNorm = 0.8418, lr_0 = 2.6225e-04
Loss = 9.6845e-02, PNorm = 81.5147, GNorm = 1.0773, lr_0 = 2.6207e-04
Loss = 9.3257e-02, PNorm = 81.5234, GNorm = 0.7011, lr_0 = 2.6189e-04
Loss = 8.8682e-02, PNorm = 81.5326, GNorm = 0.5472, lr_0 = 2.6171e-04
Loss = 9.0718e-02, PNorm = 81.5363, GNorm = 0.4524, lr_0 = 2.6153e-04
Loss = 8.6357e-02, PNorm = 81.5419, GNorm = 0.5671, lr_0 = 2.6136e-04
Loss = 1.0699e-01, PNorm = 81.5499, GNorm = 0.5263, lr_0 = 2.6118e-04
Loss = 8.9117e-02, PNorm = 81.5558, GNorm = 0.5398, lr_0 = 2.6100e-04
Loss = 9.4610e-02, PNorm = 81.5587, GNorm = 0.7658, lr_0 = 2.6082e-04
Loss = 9.8219e-02, PNorm = 81.5639, GNorm = 0.6502, lr_0 = 2.6064e-04
Loss = 9.1113e-02, PNorm = 81.5699, GNorm = 0.7529, lr_0 = 2.6046e-04
Loss = 9.7084e-02, PNorm = 81.5723, GNorm = 0.5307, lr_0 = 2.6028e-04
Loss = 9.8101e-02, PNorm = 81.5760, GNorm = 0.5517, lr_0 = 2.6011e-04
Loss = 9.9971e-02, PNorm = 81.5814, GNorm = 0.6352, lr_0 = 2.5993e-04
Loss = 9.3527e-02, PNorm = 81.5870, GNorm = 0.5229, lr_0 = 2.5975e-04
Loss = 1.1124e-01, PNorm = 81.5927, GNorm = 0.6764, lr_0 = 2.5957e-04
Loss = 9.7114e-02, PNorm = 81.5978, GNorm = 0.8603, lr_0 = 2.5939e-04
Loss = 8.9918e-02, PNorm = 81.6041, GNorm = 0.6004, lr_0 = 2.5922e-04
Loss = 9.9014e-02, PNorm = 81.6089, GNorm = 1.1115, lr_0 = 2.5904e-04
Loss = 9.4409e-02, PNorm = 81.6124, GNorm = 0.6480, lr_0 = 2.5886e-04
Loss = 9.9986e-02, PNorm = 81.6153, GNorm = 0.5182, lr_0 = 2.5868e-04
Loss = 1.0992e-01, PNorm = 81.6194, GNorm = 0.6877, lr_0 = 2.5851e-04
Loss = 9.4913e-02, PNorm = 81.6239, GNorm = 0.5750, lr_0 = 2.5833e-04
Loss = 9.5965e-02, PNorm = 81.6334, GNorm = 0.5045, lr_0 = 2.5815e-04
Loss = 9.2347e-02, PNorm = 81.6404, GNorm = 0.5203, lr_0 = 2.5797e-04
Loss = 9.5302e-02, PNorm = 81.6438, GNorm = 0.4892, lr_0 = 2.5780e-04
Loss = 8.3861e-02, PNorm = 81.6478, GNorm = 0.6386, lr_0 = 2.5762e-04
Loss = 1.0024e-01, PNorm = 81.6486, GNorm = 0.5373, lr_0 = 2.5745e-04
Loss = 9.9922e-02, PNorm = 81.6493, GNorm = 0.7382, lr_0 = 2.5727e-04
Loss = 1.0464e-01, PNorm = 81.6560, GNorm = 1.2372, lr_0 = 2.5709e-04
Loss = 9.4728e-02, PNorm = 81.6593, GNorm = 0.4492, lr_0 = 2.5692e-04
Loss = 9.3491e-02, PNorm = 81.6619, GNorm = 0.5798, lr_0 = 2.5674e-04
Loss = 9.9331e-02, PNorm = 81.6625, GNorm = 0.5257, lr_0 = 2.5656e-04
Loss = 8.9364e-02, PNorm = 81.6664, GNorm = 0.6499, lr_0 = 2.5639e-04
Loss = 1.0203e-01, PNorm = 81.6696, GNorm = 0.5388, lr_0 = 2.5621e-04
Loss = 8.4918e-02, PNorm = 81.6745, GNorm = 0.4286, lr_0 = 2.5604e-04
Loss = 1.0704e-01, PNorm = 81.6787, GNorm = 0.6662, lr_0 = 2.5586e-04
Loss = 9.6928e-02, PNorm = 81.6877, GNorm = 0.6021, lr_0 = 2.5569e-04
Loss = 9.8527e-02, PNorm = 81.6908, GNorm = 0.8434, lr_0 = 2.5551e-04
Loss = 8.8427e-02, PNorm = 81.6967, GNorm = 0.7002, lr_0 = 2.5534e-04
Loss = 9.4494e-02, PNorm = 81.7039, GNorm = 0.6584, lr_0 = 2.5516e-04
Loss = 9.5623e-02, PNorm = 81.7068, GNorm = 0.7945, lr_0 = 2.5499e-04
Loss = 9.8157e-02, PNorm = 81.7127, GNorm = 0.4919, lr_0 = 2.5481e-04
Loss = 1.0416e-01, PNorm = 81.7199, GNorm = 0.9609, lr_0 = 2.5464e-04
Loss = 8.6752e-02, PNorm = 81.7234, GNorm = 0.7115, lr_0 = 2.5446e-04
Loss = 8.0378e-02, PNorm = 81.7272, GNorm = 0.7245, lr_0 = 2.5429e-04
Loss = 9.7865e-02, PNorm = 81.7337, GNorm = 0.5243, lr_0 = 2.5411e-04
Loss = 9.4091e-02, PNorm = 81.7416, GNorm = 0.6624, lr_0 = 2.5394e-04
Loss = 8.9626e-02, PNorm = 81.7494, GNorm = 0.6139, lr_0 = 2.5377e-04
Loss = 8.8752e-02, PNorm = 81.7527, GNorm = 0.6981, lr_0 = 2.5359e-04
Loss = 1.1018e-01, PNorm = 81.7559, GNorm = 0.5753, lr_0 = 2.5342e-04
Loss = 9.0002e-02, PNorm = 81.7590, GNorm = 0.7339, lr_0 = 2.5325e-04
Loss = 8.7206e-02, PNorm = 81.7625, GNorm = 0.4887, lr_0 = 2.5307e-04
Loss = 9.6634e-02, PNorm = 81.7675, GNorm = 0.9210, lr_0 = 2.5290e-04
Loss = 1.0350e-01, PNorm = 81.7745, GNorm = 0.5910, lr_0 = 2.5273e-04
Loss = 9.6183e-02, PNorm = 81.7765, GNorm = 0.7971, lr_0 = 2.5255e-04
Loss = 1.0944e-01, PNorm = 81.7800, GNorm = 0.5762, lr_0 = 2.5238e-04
Loss = 1.0398e-01, PNorm = 81.7881, GNorm = 0.5555, lr_0 = 2.5221e-04
Loss = 1.0352e-01, PNorm = 81.7955, GNorm = 0.7211, lr_0 = 2.5203e-04
Loss = 1.1212e-01, PNorm = 81.8013, GNorm = 0.6073, lr_0 = 2.5186e-04
Loss = 7.9870e-02, PNorm = 81.8097, GNorm = 0.5965, lr_0 = 2.5169e-04
Loss = 8.3817e-02, PNorm = 81.8147, GNorm = 0.5408, lr_0 = 2.5152e-04
Loss = 1.0417e-01, PNorm = 81.8172, GNorm = 0.5934, lr_0 = 2.5134e-04
Loss = 9.1987e-02, PNorm = 81.8221, GNorm = 0.9054, lr_0 = 2.5117e-04
Loss = 8.8225e-02, PNorm = 81.8295, GNorm = 0.9661, lr_0 = 2.5100e-04
Loss = 9.9240e-02, PNorm = 81.8351, GNorm = 0.7631, lr_0 = 2.5083e-04
Loss = 9.6534e-02, PNorm = 81.8379, GNorm = 0.5198, lr_0 = 2.5066e-04
Loss = 8.8698e-02, PNorm = 81.8425, GNorm = 0.7237, lr_0 = 2.5048e-04
Loss = 9.8780e-02, PNorm = 81.8440, GNorm = 0.6442, lr_0 = 2.5031e-04
Loss = 1.0603e-01, PNorm = 81.8460, GNorm = 0.7681, lr_0 = 2.5014e-04
Loss = 9.1743e-02, PNorm = 81.8510, GNorm = 0.7622, lr_0 = 2.4997e-04
Loss = 9.9551e-02, PNorm = 81.8559, GNorm = 0.6874, lr_0 = 2.4980e-04
Loss = 1.0601e-01, PNorm = 81.8597, GNorm = 0.6007, lr_0 = 2.4963e-04
Loss = 1.0378e-01, PNorm = 81.8644, GNorm = 0.6184, lr_0 = 2.4946e-04
Loss = 1.1116e-01, PNorm = 81.8694, GNorm = 0.4792, lr_0 = 2.4929e-04
Loss = 1.0031e-01, PNorm = 81.8729, GNorm = 0.6323, lr_0 = 2.4911e-04
Loss = 1.0430e-01, PNorm = 81.8767, GNorm = 1.0152, lr_0 = 2.4894e-04
Loss = 1.0325e-01, PNorm = 81.8802, GNorm = 0.5366, lr_0 = 2.4877e-04
Loss = 9.8035e-02, PNorm = 81.8843, GNorm = 0.7014, lr_0 = 2.4860e-04
Loss = 9.9626e-02, PNorm = 81.8888, GNorm = 0.7339, lr_0 = 2.4843e-04
Loss = 8.6243e-02, PNorm = 81.8940, GNorm = 0.7256, lr_0 = 2.4826e-04
Loss = 1.0796e-01, PNorm = 81.9001, GNorm = 1.2315, lr_0 = 2.4809e-04
Loss = 8.7758e-02, PNorm = 81.9059, GNorm = 0.7061, lr_0 = 2.4792e-04
Loss = 1.0344e-01, PNorm = 81.9130, GNorm = 0.7046, lr_0 = 2.4775e-04
Loss = 9.3796e-02, PNorm = 81.9184, GNorm = 0.6107, lr_0 = 2.4758e-04
Loss = 8.8948e-02, PNorm = 81.9236, GNorm = 0.5356, lr_0 = 2.4741e-04
Loss = 9.6325e-02, PNorm = 81.9276, GNorm = 0.6840, lr_0 = 2.4724e-04
Loss = 1.1370e-01, PNorm = 81.9300, GNorm = 0.6842, lr_0 = 2.4707e-04
Validation mae = 0.227759
Epoch 19
Loss = 8.6521e-02, PNorm = 81.9354, GNorm = 0.5550, lr_0 = 2.4690e-04
Loss = 7.9101e-02, PNorm = 81.9379, GNorm = 0.6837, lr_0 = 2.4674e-04
Loss = 8.9604e-02, PNorm = 81.9414, GNorm = 1.0208, lr_0 = 2.4657e-04
Loss = 8.4137e-02, PNorm = 81.9464, GNorm = 0.5584, lr_0 = 2.4640e-04
Loss = 9.6913e-02, PNorm = 81.9518, GNorm = 0.5059, lr_0 = 2.4623e-04
Loss = 9.2644e-02, PNorm = 81.9548, GNorm = 0.5013, lr_0 = 2.4606e-04
Loss = 8.9080e-02, PNorm = 81.9585, GNorm = 0.4633, lr_0 = 2.4589e-04
Loss = 8.5872e-02, PNorm = 81.9630, GNorm = 0.5807, lr_0 = 2.4572e-04
Loss = 9.7249e-02, PNorm = 81.9703, GNorm = 0.5470, lr_0 = 2.4556e-04
Loss = 9.5985e-02, PNorm = 81.9776, GNorm = 0.5915, lr_0 = 2.4539e-04
Loss = 9.2901e-02, PNorm = 81.9872, GNorm = 0.7553, lr_0 = 2.4522e-04
Loss = 9.7991e-02, PNorm = 81.9901, GNorm = 0.7668, lr_0 = 2.4505e-04
Loss = 9.2048e-02, PNorm = 81.9951, GNorm = 0.6052, lr_0 = 2.4488e-04
Loss = 1.0183e-01, PNorm = 82.0005, GNorm = 0.6719, lr_0 = 2.4472e-04
Loss = 9.0542e-02, PNorm = 82.0024, GNorm = 0.7096, lr_0 = 2.4455e-04
Loss = 1.0956e-01, PNorm = 82.0042, GNorm = 0.6258, lr_0 = 2.4438e-04
Loss = 7.9630e-02, PNorm = 82.0069, GNorm = 0.4845, lr_0 = 2.4421e-04
Loss = 1.0776e-01, PNorm = 82.0131, GNorm = 0.6420, lr_0 = 2.4405e-04
Loss = 9.3911e-02, PNorm = 82.0152, GNorm = 0.5843, lr_0 = 2.4388e-04
Loss = 8.0431e-02, PNorm = 82.0203, GNorm = 0.4467, lr_0 = 2.4371e-04
Loss = 8.3147e-02, PNorm = 82.0240, GNorm = 0.5825, lr_0 = 2.4354e-04
Loss = 1.0775e-01, PNorm = 82.0307, GNorm = 1.0987, lr_0 = 2.4338e-04
Loss = 1.0103e-01, PNorm = 82.0373, GNorm = 0.6697, lr_0 = 2.4321e-04
Loss = 9.2008e-02, PNorm = 82.0428, GNorm = 0.7687, lr_0 = 2.4304e-04
Loss = 9.6475e-02, PNorm = 82.0459, GNorm = 0.6639, lr_0 = 2.4288e-04
Loss = 9.8621e-02, PNorm = 82.0507, GNorm = 0.6629, lr_0 = 2.4271e-04
Loss = 9.0154e-02, PNorm = 82.0578, GNorm = 0.5729, lr_0 = 2.4254e-04
Loss = 9.6325e-02, PNorm = 82.0632, GNorm = 0.5902, lr_0 = 2.4238e-04
Loss = 9.2429e-02, PNorm = 82.0649, GNorm = 0.6391, lr_0 = 2.4221e-04
Loss = 8.7890e-02, PNorm = 82.0693, GNorm = 0.8457, lr_0 = 2.4205e-04
Loss = 9.0606e-02, PNorm = 82.0734, GNorm = 0.6961, lr_0 = 2.4188e-04
Loss = 9.1764e-02, PNorm = 82.0754, GNorm = 0.6765, lr_0 = 2.4171e-04
Loss = 8.3278e-02, PNorm = 82.0799, GNorm = 0.6602, lr_0 = 2.4155e-04
Loss = 9.8563e-02, PNorm = 82.0841, GNorm = 0.8567, lr_0 = 2.4138e-04
Loss = 9.7087e-02, PNorm = 82.0886, GNorm = 0.5164, lr_0 = 2.4122e-04
Loss = 9.6774e-02, PNorm = 82.0937, GNorm = 0.5598, lr_0 = 2.4105e-04
Loss = 8.9765e-02, PNorm = 82.0978, GNorm = 0.5339, lr_0 = 2.4089e-04
Loss = 9.0076e-02, PNorm = 82.0986, GNorm = 0.6388, lr_0 = 2.4072e-04
Loss = 8.4646e-02, PNorm = 82.1015, GNorm = 0.4528, lr_0 = 2.4056e-04
Loss = 9.3441e-02, PNorm = 82.1040, GNorm = 1.0600, lr_0 = 2.4039e-04
Loss = 8.3684e-02, PNorm = 82.1078, GNorm = 0.7686, lr_0 = 2.4023e-04
Loss = 9.7551e-02, PNorm = 82.1105, GNorm = 0.6875, lr_0 = 2.4006e-04
Loss = 8.8220e-02, PNorm = 82.1145, GNorm = 0.5757, lr_0 = 2.3990e-04
Loss = 1.0177e-01, PNorm = 82.1218, GNorm = 0.7539, lr_0 = 2.3974e-04
Loss = 1.1713e-01, PNorm = 82.1304, GNorm = 0.6536, lr_0 = 2.3957e-04
Loss = 9.1701e-02, PNorm = 82.1350, GNorm = 0.6964, lr_0 = 2.3941e-04
Loss = 1.0184e-01, PNorm = 82.1398, GNorm = 1.1008, lr_0 = 2.3924e-04
Loss = 9.5124e-02, PNorm = 82.1414, GNorm = 0.5681, lr_0 = 2.3908e-04
Loss = 1.0160e-01, PNorm = 82.1447, GNorm = 0.9406, lr_0 = 2.3892e-04
Loss = 9.4965e-02, PNorm = 82.1488, GNorm = 0.6351, lr_0 = 2.3875e-04
Loss = 8.3584e-02, PNorm = 82.1564, GNorm = 0.6868, lr_0 = 2.3859e-04
Loss = 9.7433e-02, PNorm = 82.1614, GNorm = 0.8430, lr_0 = 2.3842e-04
Loss = 8.9162e-02, PNorm = 82.1661, GNorm = 0.6012, lr_0 = 2.3826e-04
Loss = 8.9018e-02, PNorm = 82.1711, GNorm = 0.7763, lr_0 = 2.3810e-04
Loss = 1.0155e-01, PNorm = 82.1779, GNorm = 0.8902, lr_0 = 2.3794e-04
Loss = 1.0042e-01, PNorm = 82.1790, GNorm = 0.6766, lr_0 = 2.3777e-04
Loss = 9.6593e-02, PNorm = 82.1824, GNorm = 0.7068, lr_0 = 2.3761e-04
Loss = 9.6153e-02, PNorm = 82.1880, GNorm = 1.0219, lr_0 = 2.3745e-04
Loss = 8.8701e-02, PNorm = 82.1904, GNorm = 0.7694, lr_0 = 2.3728e-04
Loss = 9.5022e-02, PNorm = 82.1954, GNorm = 0.6294, lr_0 = 2.3712e-04
Loss = 8.1844e-02, PNorm = 82.2013, GNorm = 0.5689, lr_0 = 2.3696e-04
Loss = 9.7510e-02, PNorm = 82.2074, GNorm = 0.7716, lr_0 = 2.3680e-04
Loss = 1.0298e-01, PNorm = 82.2125, GNorm = 0.8075, lr_0 = 2.3663e-04
Loss = 9.5745e-02, PNorm = 82.2141, GNorm = 0.4591, lr_0 = 2.3647e-04
Loss = 1.0896e-01, PNorm = 82.2186, GNorm = 0.7086, lr_0 = 2.3631e-04
Loss = 8.3403e-02, PNorm = 82.2212, GNorm = 0.5605, lr_0 = 2.3615e-04
Loss = 9.7354e-02, PNorm = 82.2257, GNorm = 0.7388, lr_0 = 2.3599e-04
Loss = 1.0123e-01, PNorm = 82.2308, GNorm = 0.6395, lr_0 = 2.3582e-04
Loss = 9.7915e-02, PNorm = 82.2367, GNorm = 0.6937, lr_0 = 2.3566e-04
Loss = 1.0323e-01, PNorm = 82.2440, GNorm = 0.6247, lr_0 = 2.3550e-04
Loss = 8.9811e-02, PNorm = 82.2469, GNorm = 0.5388, lr_0 = 2.3534e-04
Loss = 9.3406e-02, PNorm = 82.2518, GNorm = 0.5186, lr_0 = 2.3518e-04
Loss = 1.0141e-01, PNorm = 82.2561, GNorm = 0.6038, lr_0 = 2.3502e-04
Loss = 1.0600e-01, PNorm = 82.2625, GNorm = 0.8782, lr_0 = 2.3486e-04
Loss = 9.3690e-02, PNorm = 82.2662, GNorm = 0.6865, lr_0 = 2.3470e-04
Loss = 8.9111e-02, PNorm = 82.2680, GNorm = 0.7586, lr_0 = 2.3454e-04
Loss = 8.7008e-02, PNorm = 82.2727, GNorm = 0.7324, lr_0 = 2.3437e-04
Loss = 9.0291e-02, PNorm = 82.2788, GNorm = 1.0113, lr_0 = 2.3421e-04
Loss = 9.5164e-02, PNorm = 82.2804, GNorm = 0.7996, lr_0 = 2.3405e-04
Loss = 8.4864e-02, PNorm = 82.2857, GNorm = 0.5498, lr_0 = 2.3389e-04
Loss = 8.9162e-02, PNorm = 82.2915, GNorm = 0.8513, lr_0 = 2.3373e-04
Loss = 9.2898e-02, PNorm = 82.2953, GNorm = 0.8714, lr_0 = 2.3357e-04
Loss = 1.1797e-01, PNorm = 82.3009, GNorm = 0.6725, lr_0 = 2.3341e-04
Loss = 1.0113e-01, PNorm = 82.3051, GNorm = 0.8205, lr_0 = 2.3325e-04
Loss = 9.8042e-02, PNorm = 82.3128, GNorm = 0.5030, lr_0 = 2.3309e-04
Loss = 9.1005e-02, PNorm = 82.3187, GNorm = 0.5693, lr_0 = 2.3293e-04
Loss = 9.0647e-02, PNorm = 82.3215, GNorm = 0.7926, lr_0 = 2.3277e-04
Loss = 9.4902e-02, PNorm = 82.3237, GNorm = 0.6081, lr_0 = 2.3261e-04
Loss = 7.3335e-02, PNorm = 82.3272, GNorm = 0.5407, lr_0 = 2.3246e-04
Loss = 9.9409e-02, PNorm = 82.3313, GNorm = 1.0762, lr_0 = 2.3230e-04
Loss = 9.7345e-02, PNorm = 82.3362, GNorm = 0.5331, lr_0 = 2.3214e-04
Loss = 1.1333e-01, PNorm = 82.3446, GNorm = 0.7189, lr_0 = 2.3198e-04
Loss = 8.8129e-02, PNorm = 82.3493, GNorm = 0.6221, lr_0 = 2.3182e-04
Loss = 9.2726e-02, PNorm = 82.3555, GNorm = 0.6543, lr_0 = 2.3166e-04
Loss = 8.7822e-02, PNorm = 82.3616, GNorm = 0.7176, lr_0 = 2.3150e-04
Loss = 1.0653e-01, PNorm = 82.3664, GNorm = 0.5536, lr_0 = 2.3134e-04
Loss = 9.8510e-02, PNorm = 82.3710, GNorm = 0.6936, lr_0 = 2.3118e-04
Loss = 9.2839e-02, PNorm = 82.3764, GNorm = 0.6011, lr_0 = 2.3103e-04
Loss = 9.2625e-02, PNorm = 82.3805, GNorm = 0.5197, lr_0 = 2.3087e-04
Loss = 8.4028e-02, PNorm = 82.3875, GNorm = 0.6700, lr_0 = 2.3071e-04
Loss = 9.5023e-02, PNorm = 82.3932, GNorm = 0.6907, lr_0 = 2.3055e-04
Loss = 9.6866e-02, PNorm = 82.3963, GNorm = 0.7623, lr_0 = 2.3039e-04
Loss = 1.0855e-01, PNorm = 82.3971, GNorm = 0.7290, lr_0 = 2.3024e-04
Loss = 9.8575e-02, PNorm = 82.3988, GNorm = 0.5356, lr_0 = 2.3008e-04
Loss = 1.0426e-01, PNorm = 82.4024, GNorm = 0.6171, lr_0 = 2.2992e-04
Loss = 1.0242e-01, PNorm = 82.4068, GNorm = 0.8254, lr_0 = 2.2976e-04
Loss = 9.6709e-02, PNorm = 82.4089, GNorm = 0.6687, lr_0 = 2.2961e-04
Loss = 1.0441e-01, PNorm = 82.4143, GNorm = 0.7506, lr_0 = 2.2945e-04
Loss = 9.5551e-02, PNorm = 82.4174, GNorm = 0.8399, lr_0 = 2.2929e-04
Loss = 9.0666e-02, PNorm = 82.4226, GNorm = 0.9204, lr_0 = 2.2913e-04
Loss = 9.9384e-02, PNorm = 82.4287, GNorm = 0.7656, lr_0 = 2.2898e-04
Loss = 9.5682e-02, PNorm = 82.4355, GNorm = 0.7185, lr_0 = 2.2882e-04
Loss = 8.6005e-02, PNorm = 82.4380, GNorm = 0.6798, lr_0 = 2.2866e-04
Loss = 9.6751e-02, PNorm = 82.4425, GNorm = 0.5677, lr_0 = 2.2851e-04
Loss = 8.8986e-02, PNorm = 82.4465, GNorm = 0.6199, lr_0 = 2.2835e-04
Loss = 9.3711e-02, PNorm = 82.4487, GNorm = 0.5951, lr_0 = 2.2819e-04
Loss = 9.4426e-02, PNorm = 82.4528, GNorm = 0.6006, lr_0 = 2.2804e-04
Loss = 1.1258e-01, PNorm = 82.4574, GNorm = 0.6128, lr_0 = 2.2788e-04
Loss = 9.6796e-02, PNorm = 82.4614, GNorm = 0.9026, lr_0 = 2.2773e-04
Loss = 9.6746e-02, PNorm = 82.4664, GNorm = 0.6409, lr_0 = 2.2757e-04
Validation mae = 0.229791
Epoch 20
Loss = 7.9309e-02, PNorm = 82.4685, GNorm = 0.7072, lr_0 = 2.2741e-04
Loss = 9.3058e-02, PNorm = 82.4746, GNorm = 0.4211, lr_0 = 2.2726e-04
Loss = 7.8173e-02, PNorm = 82.4803, GNorm = 0.7259, lr_0 = 2.2710e-04
Loss = 9.2111e-02, PNorm = 82.4865, GNorm = 0.8452, lr_0 = 2.2695e-04
Loss = 8.0035e-02, PNorm = 82.4902, GNorm = 0.6812, lr_0 = 2.2679e-04
Loss = 9.8226e-02, PNorm = 82.4929, GNorm = 0.6010, lr_0 = 2.2664e-04
Loss = 9.4361e-02, PNorm = 82.4965, GNorm = 0.5821, lr_0 = 2.2648e-04
Loss = 8.6981e-02, PNorm = 82.4993, GNorm = 1.0476, lr_0 = 2.2632e-04
Loss = 9.7752e-02, PNorm = 82.5054, GNorm = 0.6733, lr_0 = 2.2617e-04
Loss = 8.1365e-02, PNorm = 82.5079, GNorm = 0.5830, lr_0 = 2.2601e-04
Loss = 8.1819e-02, PNorm = 82.5113, GNorm = 0.6749, lr_0 = 2.2586e-04
Loss = 9.0983e-02, PNorm = 82.5161, GNorm = 0.5157, lr_0 = 2.2571e-04
Loss = 8.3126e-02, PNorm = 82.5218, GNorm = 0.6516, lr_0 = 2.2555e-04
Loss = 8.5591e-02, PNorm = 82.5247, GNorm = 0.6703, lr_0 = 2.2540e-04
Loss = 9.7259e-02, PNorm = 82.5278, GNorm = 0.6233, lr_0 = 2.2524e-04
Loss = 1.0359e-01, PNorm = 82.5334, GNorm = 0.7815, lr_0 = 2.2509e-04
Loss = 9.4334e-02, PNorm = 82.5389, GNorm = 0.5783, lr_0 = 2.2493e-04
Loss = 8.3590e-02, PNorm = 82.5421, GNorm = 0.7304, lr_0 = 2.2478e-04
Loss = 8.5388e-02, PNorm = 82.5473, GNorm = 0.5359, lr_0 = 2.2463e-04
Loss = 8.9113e-02, PNorm = 82.5541, GNorm = 0.5906, lr_0 = 2.2447e-04
Loss = 8.8303e-02, PNorm = 82.5568, GNorm = 0.9112, lr_0 = 2.2432e-04
Loss = 7.5223e-02, PNorm = 82.5573, GNorm = 0.5413, lr_0 = 2.2416e-04
Loss = 8.9427e-02, PNorm = 82.5646, GNorm = 0.5992, lr_0 = 2.2401e-04
Loss = 9.0174e-02, PNorm = 82.5716, GNorm = 0.8100, lr_0 = 2.2386e-04
Loss = 1.0141e-01, PNorm = 82.5746, GNorm = 0.5933, lr_0 = 2.2370e-04
Loss = 8.9424e-02, PNorm = 82.5763, GNorm = 0.6332, lr_0 = 2.2355e-04
Loss = 9.8332e-02, PNorm = 82.5827, GNorm = 0.8221, lr_0 = 2.2340e-04
Loss = 9.0125e-02, PNorm = 82.5881, GNorm = 0.6198, lr_0 = 2.2324e-04
Loss = 9.1867e-02, PNorm = 82.5945, GNorm = 0.6399, lr_0 = 2.2309e-04
Loss = 7.9240e-02, PNorm = 82.5970, GNorm = 0.5553, lr_0 = 2.2294e-04
Loss = 7.9949e-02, PNorm = 82.6004, GNorm = 0.8871, lr_0 = 2.2279e-04
Loss = 8.2290e-02, PNorm = 82.6045, GNorm = 0.5504, lr_0 = 2.2263e-04
Loss = 8.5681e-02, PNorm = 82.6111, GNorm = 0.6334, lr_0 = 2.2248e-04
Loss = 1.0009e-01, PNorm = 82.6138, GNorm = 0.5937, lr_0 = 2.2233e-04
Loss = 8.0052e-02, PNorm = 82.6168, GNorm = 0.7537, lr_0 = 2.2218e-04
Loss = 8.5511e-02, PNorm = 82.6218, GNorm = 0.8880, lr_0 = 2.2202e-04
Loss = 9.4008e-02, PNorm = 82.6259, GNorm = 0.5653, lr_0 = 2.2187e-04
Loss = 9.6691e-02, PNorm = 82.6317, GNorm = 0.5150, lr_0 = 2.2172e-04
Loss = 9.5798e-02, PNorm = 82.6371, GNorm = 0.6292, lr_0 = 2.2157e-04
Loss = 1.1050e-01, PNorm = 82.6437, GNorm = 0.6179, lr_0 = 2.2142e-04
Loss = 9.0582e-02, PNorm = 82.6489, GNorm = 0.6058, lr_0 = 2.2126e-04
Loss = 8.7365e-02, PNorm = 82.6541, GNorm = 0.5708, lr_0 = 2.2111e-04
Loss = 9.4281e-02, PNorm = 82.6599, GNorm = 0.6398, lr_0 = 2.2096e-04
Loss = 9.4569e-02, PNorm = 82.6613, GNorm = 0.7383, lr_0 = 2.2081e-04
Loss = 8.0469e-02, PNorm = 82.6651, GNorm = 0.5213, lr_0 = 2.2066e-04
Loss = 9.0561e-02, PNorm = 82.6653, GNorm = 0.6966, lr_0 = 2.2051e-04
Loss = 1.0162e-01, PNorm = 82.6692, GNorm = 0.7982, lr_0 = 2.2036e-04
Loss = 9.5773e-02, PNorm = 82.6748, GNorm = 1.0126, lr_0 = 2.2021e-04
Loss = 9.8729e-02, PNorm = 82.6803, GNorm = 0.7999, lr_0 = 2.2005e-04
Loss = 9.0274e-02, PNorm = 82.6865, GNorm = 0.7036, lr_0 = 2.1990e-04
Loss = 8.5395e-02, PNorm = 82.6912, GNorm = 0.6825, lr_0 = 2.1975e-04
Loss = 9.7720e-02, PNorm = 82.6966, GNorm = 0.5691, lr_0 = 2.1960e-04
Loss = 9.9872e-02, PNorm = 82.7013, GNorm = 0.5491, lr_0 = 2.1945e-04
Loss = 9.5665e-02, PNorm = 82.7075, GNorm = 0.6099, lr_0 = 2.1930e-04
Loss = 8.9465e-02, PNorm = 82.7112, GNorm = 0.6840, lr_0 = 2.1915e-04
Loss = 8.8063e-02, PNorm = 82.7125, GNorm = 0.9831, lr_0 = 2.1900e-04
Loss = 9.4505e-02, PNorm = 82.7130, GNorm = 0.5279, lr_0 = 2.1885e-04
Loss = 9.5615e-02, PNorm = 82.7194, GNorm = 0.8817, lr_0 = 2.1870e-04
Loss = 1.0013e-01, PNorm = 82.7254, GNorm = 0.8093, lr_0 = 2.1855e-04
Loss = 9.7327e-02, PNorm = 82.7288, GNorm = 0.5831, lr_0 = 2.1840e-04
Loss = 9.5013e-02, PNorm = 82.7350, GNorm = 0.5507, lr_0 = 2.1825e-04
Loss = 8.4115e-02, PNorm = 82.7395, GNorm = 0.7109, lr_0 = 2.1810e-04
Loss = 1.0087e-01, PNorm = 82.7417, GNorm = 0.6588, lr_0 = 2.1795e-04
Loss = 9.2178e-02, PNorm = 82.7454, GNorm = 0.8929, lr_0 = 2.1780e-04
Loss = 8.5064e-02, PNorm = 82.7495, GNorm = 0.5105, lr_0 = 2.1765e-04
Loss = 9.8310e-02, PNorm = 82.7533, GNorm = 0.7519, lr_0 = 2.1751e-04
Loss = 8.7169e-02, PNorm = 82.7577, GNorm = 0.8239, lr_0 = 2.1736e-04
Loss = 8.0968e-02, PNorm = 82.7600, GNorm = 0.5817, lr_0 = 2.1721e-04
Loss = 8.9895e-02, PNorm = 82.7625, GNorm = 0.6045, lr_0 = 2.1706e-04
Loss = 9.3086e-02, PNorm = 82.7683, GNorm = 0.5190, lr_0 = 2.1691e-04
Loss = 8.5005e-02, PNorm = 82.7728, GNorm = 0.6221, lr_0 = 2.1676e-04
Loss = 1.0283e-01, PNorm = 82.7797, GNorm = 0.6860, lr_0 = 2.1661e-04
Loss = 8.8735e-02, PNorm = 82.7859, GNorm = 0.8750, lr_0 = 2.1646e-04
Loss = 8.7771e-02, PNorm = 82.7885, GNorm = 0.5255, lr_0 = 2.1632e-04
Loss = 8.6098e-02, PNorm = 82.7890, GNorm = 0.7260, lr_0 = 2.1617e-04
Loss = 9.2936e-02, PNorm = 82.7936, GNorm = 0.6232, lr_0 = 2.1602e-04
Loss = 9.5428e-02, PNorm = 82.7994, GNorm = 0.5250, lr_0 = 2.1587e-04
Loss = 7.4675e-02, PNorm = 82.8010, GNorm = 0.5589, lr_0 = 2.1572e-04
Loss = 9.6737e-02, PNorm = 82.8052, GNorm = 0.4850, lr_0 = 2.1558e-04
Loss = 9.1319e-02, PNorm = 82.8106, GNorm = 1.0416, lr_0 = 2.1543e-04
Loss = 9.2737e-02, PNorm = 82.8134, GNorm = 0.7532, lr_0 = 2.1528e-04
Loss = 9.1723e-02, PNorm = 82.8189, GNorm = 0.8509, lr_0 = 2.1513e-04
Loss = 9.2407e-02, PNorm = 82.8225, GNorm = 1.0361, lr_0 = 2.1499e-04
Loss = 8.9082e-02, PNorm = 82.8239, GNorm = 0.7625, lr_0 = 2.1484e-04
Loss = 9.1019e-02, PNorm = 82.8267, GNorm = 0.5961, lr_0 = 2.1469e-04
Loss = 9.0100e-02, PNorm = 82.8333, GNorm = 0.6651, lr_0 = 2.1454e-04
Loss = 9.7149e-02, PNorm = 82.8358, GNorm = 0.5350, lr_0 = 2.1440e-04
Loss = 9.7033e-02, PNorm = 82.8377, GNorm = 0.7190, lr_0 = 2.1425e-04
Loss = 8.4629e-02, PNorm = 82.8427, GNorm = 0.5750, lr_0 = 2.1410e-04
Loss = 9.8677e-02, PNorm = 82.8421, GNorm = 0.5515, lr_0 = 2.1396e-04
Loss = 8.2582e-02, PNorm = 82.8449, GNorm = 0.6678, lr_0 = 2.1381e-04
Loss = 8.7932e-02, PNorm = 82.8489, GNorm = 0.5953, lr_0 = 2.1366e-04
Loss = 9.6952e-02, PNorm = 82.8494, GNorm = 1.0357, lr_0 = 2.1352e-04
Loss = 9.7549e-02, PNorm = 82.8569, GNorm = 0.6159, lr_0 = 2.1337e-04
Loss = 9.3337e-02, PNorm = 82.8628, GNorm = 0.5816, lr_0 = 2.1323e-04
Loss = 9.3877e-02, PNorm = 82.8659, GNorm = 1.0484, lr_0 = 2.1308e-04
Loss = 1.0905e-01, PNorm = 82.8694, GNorm = 0.7053, lr_0 = 2.1293e-04
Loss = 9.8473e-02, PNorm = 82.8734, GNorm = 0.6406, lr_0 = 2.1279e-04
Loss = 8.8389e-02, PNorm = 82.8779, GNorm = 0.6357, lr_0 = 2.1264e-04
Loss = 9.4504e-02, PNorm = 82.8817, GNorm = 0.8121, lr_0 = 2.1250e-04
Loss = 9.2996e-02, PNorm = 82.8876, GNorm = 0.5710, lr_0 = 2.1235e-04
Loss = 9.9522e-02, PNorm = 82.8896, GNorm = 0.5806, lr_0 = 2.1221e-04
Loss = 9.6821e-02, PNorm = 82.8925, GNorm = 0.7932, lr_0 = 2.1206e-04
Loss = 1.0512e-01, PNorm = 82.8972, GNorm = 0.5633, lr_0 = 2.1191e-04
Loss = 9.0911e-02, PNorm = 82.9031, GNorm = 0.7902, lr_0 = 2.1177e-04
Loss = 9.2789e-02, PNorm = 82.9059, GNorm = 0.4974, lr_0 = 2.1162e-04
Loss = 9.5878e-02, PNorm = 82.9107, GNorm = 0.9654, lr_0 = 2.1148e-04
Loss = 8.4420e-02, PNorm = 82.9144, GNorm = 0.7185, lr_0 = 2.1133e-04
Loss = 9.2416e-02, PNorm = 82.9154, GNorm = 0.7699, lr_0 = 2.1119e-04
Loss = 8.2450e-02, PNorm = 82.9191, GNorm = 0.4537, lr_0 = 2.1104e-04
Loss = 8.0476e-02, PNorm = 82.9224, GNorm = 0.6132, lr_0 = 2.1090e-04
Loss = 9.6612e-02, PNorm = 82.9259, GNorm = 0.9108, lr_0 = 2.1076e-04
Loss = 9.3730e-02, PNorm = 82.9306, GNorm = 0.5608, lr_0 = 2.1061e-04
Loss = 1.0215e-01, PNorm = 82.9333, GNorm = 0.6920, lr_0 = 2.1047e-04
Loss = 7.7904e-02, PNorm = 82.9351, GNorm = 0.5604, lr_0 = 2.1032e-04
Loss = 9.2251e-02, PNorm = 82.9386, GNorm = 0.8095, lr_0 = 2.1018e-04
Loss = 1.1342e-01, PNorm = 82.9421, GNorm = 0.6775, lr_0 = 2.1003e-04
Loss = 8.7060e-02, PNorm = 82.9453, GNorm = 0.7241, lr_0 = 2.0989e-04
Loss = 9.0916e-02, PNorm = 82.9486, GNorm = 0.7483, lr_0 = 2.0975e-04
Loss = 8.6160e-02, PNorm = 82.9506, GNorm = 0.5445, lr_0 = 2.0960e-04
Validation mae = 0.225983
Epoch 21
Loss = 9.1450e-02, PNorm = 82.9555, GNorm = 0.5112, lr_0 = 2.0946e-04
Loss = 8.3232e-02, PNorm = 82.9590, GNorm = 0.5929, lr_0 = 2.0932e-04
Loss = 8.0157e-02, PNorm = 82.9607, GNorm = 0.6432, lr_0 = 2.0917e-04
Loss = 8.6927e-02, PNorm = 82.9643, GNorm = 0.7016, lr_0 = 2.0903e-04
Loss = 8.0000e-02, PNorm = 82.9682, GNorm = 0.6243, lr_0 = 2.0889e-04
Loss = 7.3977e-02, PNorm = 82.9746, GNorm = 0.5818, lr_0 = 2.0874e-04
Loss = 8.2511e-02, PNorm = 82.9787, GNorm = 0.7249, lr_0 = 2.0860e-04
Loss = 8.2851e-02, PNorm = 82.9811, GNorm = 0.5121, lr_0 = 2.0846e-04
Loss = 9.3857e-02, PNorm = 82.9846, GNorm = 0.8285, lr_0 = 2.0831e-04
Loss = 6.8283e-02, PNorm = 82.9867, GNorm = 0.6154, lr_0 = 2.0817e-04
Loss = 9.5456e-02, PNorm = 82.9874, GNorm = 0.6635, lr_0 = 2.0803e-04
Loss = 9.6373e-02, PNorm = 82.9925, GNorm = 0.5289, lr_0 = 2.0789e-04
Loss = 9.9415e-02, PNorm = 82.9943, GNorm = 1.0014, lr_0 = 2.0774e-04
Loss = 7.2279e-02, PNorm = 82.9999, GNorm = 0.7672, lr_0 = 2.0760e-04
Loss = 8.8969e-02, PNorm = 83.0048, GNorm = 0.6788, lr_0 = 2.0746e-04
Loss = 8.6901e-02, PNorm = 83.0093, GNorm = 0.7484, lr_0 = 2.0732e-04
Loss = 9.0715e-02, PNorm = 83.0112, GNorm = 0.7305, lr_0 = 2.0718e-04
Loss = 9.5177e-02, PNorm = 83.0149, GNorm = 0.5486, lr_0 = 2.0703e-04
Loss = 8.5555e-02, PNorm = 83.0177, GNorm = 0.6262, lr_0 = 2.0689e-04
Loss = 9.7623e-02, PNorm = 83.0207, GNorm = 0.6843, lr_0 = 2.0675e-04
Loss = 8.1415e-02, PNorm = 83.0266, GNorm = 0.6989, lr_0 = 2.0661e-04
Loss = 8.4525e-02, PNorm = 83.0306, GNorm = 0.7044, lr_0 = 2.0647e-04
Loss = 8.9611e-02, PNorm = 83.0331, GNorm = 0.6759, lr_0 = 2.0633e-04
Loss = 8.0324e-02, PNorm = 83.0375, GNorm = 0.6862, lr_0 = 2.0618e-04
Loss = 8.5137e-02, PNorm = 83.0408, GNorm = 0.5749, lr_0 = 2.0604e-04
Loss = 8.0051e-02, PNorm = 83.0438, GNorm = 0.6456, lr_0 = 2.0590e-04
Loss = 8.6599e-02, PNorm = 83.0480, GNorm = 0.5853, lr_0 = 2.0576e-04
Loss = 9.1005e-02, PNorm = 83.0537, GNorm = 0.5657, lr_0 = 2.0562e-04
Loss = 9.9902e-02, PNorm = 83.0583, GNorm = 0.5819, lr_0 = 2.0548e-04
Loss = 8.2253e-02, PNorm = 83.0613, GNorm = 0.7210, lr_0 = 2.0534e-04
Loss = 9.9643e-02, PNorm = 83.0652, GNorm = 0.8140, lr_0 = 2.0520e-04
Loss = 9.1839e-02, PNorm = 83.0694, GNorm = 0.6780, lr_0 = 2.0506e-04
Loss = 7.5455e-02, PNorm = 83.0728, GNorm = 0.6216, lr_0 = 2.0492e-04
Loss = 7.5501e-02, PNorm = 83.0728, GNorm = 0.5313, lr_0 = 2.0478e-04
Loss = 9.2403e-02, PNorm = 83.0749, GNorm = 0.8370, lr_0 = 2.0464e-04
Loss = 7.6197e-02, PNorm = 83.0748, GNorm = 0.5139, lr_0 = 2.0450e-04
Loss = 8.5209e-02, PNorm = 83.0787, GNorm = 0.6615, lr_0 = 2.0436e-04
Loss = 8.4714e-02, PNorm = 83.0852, GNorm = 1.2197, lr_0 = 2.0422e-04
Loss = 7.9490e-02, PNorm = 83.0878, GNorm = 0.7442, lr_0 = 2.0408e-04
Loss = 9.4161e-02, PNorm = 83.0925, GNorm = 0.8471, lr_0 = 2.0394e-04
Loss = 8.6303e-02, PNorm = 83.0963, GNorm = 0.4966, lr_0 = 2.0380e-04
Loss = 9.8606e-02, PNorm = 83.1004, GNorm = 0.6936, lr_0 = 2.0366e-04
Loss = 8.9018e-02, PNorm = 83.1058, GNorm = 0.5584, lr_0 = 2.0352e-04
Loss = 9.8357e-02, PNorm = 83.1076, GNorm = 0.5790, lr_0 = 2.0338e-04
Loss = 9.5543e-02, PNorm = 83.1098, GNorm = 0.5845, lr_0 = 2.0324e-04
Loss = 8.0442e-02, PNorm = 83.1112, GNorm = 0.5697, lr_0 = 2.0310e-04
Loss = 9.1300e-02, PNorm = 83.1149, GNorm = 0.6565, lr_0 = 2.0296e-04
Loss = 8.8460e-02, PNorm = 83.1195, GNorm = 0.5472, lr_0 = 2.0282e-04
Loss = 8.5621e-02, PNorm = 83.1213, GNorm = 0.7929, lr_0 = 2.0268e-04
Loss = 8.5580e-02, PNorm = 83.1231, GNorm = 0.5710, lr_0 = 2.0254e-04
Loss = 9.0893e-02, PNorm = 83.1277, GNorm = 0.8260, lr_0 = 2.0240e-04
Loss = 8.6699e-02, PNorm = 83.1333, GNorm = 0.6189, lr_0 = 2.0227e-04
Loss = 9.4817e-02, PNorm = 83.1373, GNorm = 0.7638, lr_0 = 2.0213e-04
Loss = 9.2266e-02, PNorm = 83.1411, GNorm = 0.9087, lr_0 = 2.0199e-04
Loss = 9.3923e-02, PNorm = 83.1441, GNorm = 0.7600, lr_0 = 2.0185e-04
Loss = 7.5199e-02, PNorm = 83.1444, GNorm = 0.6360, lr_0 = 2.0171e-04
Loss = 9.3008e-02, PNorm = 83.1479, GNorm = 0.6450, lr_0 = 2.0157e-04
Loss = 8.7934e-02, PNorm = 83.1521, GNorm = 0.5882, lr_0 = 2.0144e-04
Loss = 1.0339e-01, PNorm = 83.1574, GNorm = 0.7199, lr_0 = 2.0130e-04
Loss = 8.8175e-02, PNorm = 83.1629, GNorm = 0.7105, lr_0 = 2.0116e-04
Loss = 8.8695e-02, PNorm = 83.1710, GNorm = 0.7349, lr_0 = 2.0102e-04
Loss = 8.6973e-02, PNorm = 83.1741, GNorm = 0.5548, lr_0 = 2.0088e-04
Loss = 8.6143e-02, PNorm = 83.1762, GNorm = 0.6041, lr_0 = 2.0075e-04
Loss = 9.7853e-02, PNorm = 83.1811, GNorm = 0.7210, lr_0 = 2.0061e-04
Loss = 9.0109e-02, PNorm = 83.1861, GNorm = 0.8163, lr_0 = 2.0047e-04
Loss = 1.0180e-01, PNorm = 83.1897, GNorm = 0.9300, lr_0 = 2.0033e-04
Loss = 7.8008e-02, PNorm = 83.1936, GNorm = 0.6328, lr_0 = 2.0020e-04
Loss = 9.2491e-02, PNorm = 83.1990, GNorm = 0.7654, lr_0 = 2.0006e-04
Loss = 9.5683e-02, PNorm = 83.2071, GNorm = 0.6589, lr_0 = 1.9992e-04
Loss = 8.1421e-02, PNorm = 83.2131, GNorm = 0.6372, lr_0 = 1.9979e-04
Loss = 8.9383e-02, PNorm = 83.2166, GNorm = 0.5896, lr_0 = 1.9965e-04
Loss = 8.7098e-02, PNorm = 83.2199, GNorm = 0.5761, lr_0 = 1.9951e-04
Loss = 9.4890e-02, PNorm = 83.2226, GNorm = 0.6388, lr_0 = 1.9938e-04
Loss = 1.0308e-01, PNorm = 83.2248, GNorm = 0.6593, lr_0 = 1.9924e-04
Loss = 9.2607e-02, PNorm = 83.2310, GNorm = 0.4064, lr_0 = 1.9910e-04
Loss = 9.6767e-02, PNorm = 83.2381, GNorm = 0.5998, lr_0 = 1.9897e-04
Loss = 9.5243e-02, PNorm = 83.2430, GNorm = 0.8017, lr_0 = 1.9883e-04
Loss = 8.5401e-02, PNorm = 83.2461, GNorm = 0.7564, lr_0 = 1.9869e-04
Loss = 1.0685e-01, PNorm = 83.2499, GNorm = 0.6655, lr_0 = 1.9856e-04
Loss = 9.0047e-02, PNorm = 83.2531, GNorm = 0.5864, lr_0 = 1.9842e-04
Loss = 8.9423e-02, PNorm = 83.2590, GNorm = 0.4743, lr_0 = 1.9829e-04
Loss = 7.7752e-02, PNorm = 83.2646, GNorm = 0.7598, lr_0 = 1.9815e-04
Loss = 8.8620e-02, PNorm = 83.2664, GNorm = 0.5579, lr_0 = 1.9801e-04
Loss = 8.9179e-02, PNorm = 83.2695, GNorm = 0.7387, lr_0 = 1.9788e-04
Loss = 8.1377e-02, PNorm = 83.2761, GNorm = 0.6155, lr_0 = 1.9774e-04
Loss = 9.3637e-02, PNorm = 83.2812, GNorm = 0.6840, lr_0 = 1.9761e-04
Loss = 8.8940e-02, PNorm = 83.2844, GNorm = 0.5441, lr_0 = 1.9747e-04
Loss = 7.8758e-02, PNorm = 83.2898, GNorm = 0.5645, lr_0 = 1.9734e-04
Loss = 9.0918e-02, PNorm = 83.2925, GNorm = 0.5904, lr_0 = 1.9720e-04
Loss = 7.8299e-02, PNorm = 83.2964, GNorm = 0.5545, lr_0 = 1.9707e-04
Loss = 9.2326e-02, PNorm = 83.3006, GNorm = 0.6312, lr_0 = 1.9693e-04
Loss = 1.0164e-01, PNorm = 83.3037, GNorm = 0.6787, lr_0 = 1.9680e-04
Loss = 9.4171e-02, PNorm = 83.3090, GNorm = 0.7185, lr_0 = 1.9666e-04
Loss = 7.7675e-02, PNorm = 83.3109, GNorm = 0.6933, lr_0 = 1.9653e-04
Loss = 9.6959e-02, PNorm = 83.3118, GNorm = 1.0322, lr_0 = 1.9639e-04
Loss = 7.9574e-02, PNorm = 83.3149, GNorm = 0.6270, lr_0 = 1.9626e-04
Loss = 1.0073e-01, PNorm = 83.3177, GNorm = 0.7296, lr_0 = 1.9612e-04
Loss = 8.3968e-02, PNorm = 83.3192, GNorm = 0.6270, lr_0 = 1.9599e-04
Loss = 8.5787e-02, PNorm = 83.3236, GNorm = 0.6500, lr_0 = 1.9585e-04
Loss = 9.1124e-02, PNorm = 83.3292, GNorm = 0.7090, lr_0 = 1.9572e-04
Loss = 9.6807e-02, PNorm = 83.3325, GNorm = 0.8093, lr_0 = 1.9559e-04
Loss = 8.3418e-02, PNorm = 83.3348, GNorm = 0.7478, lr_0 = 1.9545e-04
Loss = 8.9550e-02, PNorm = 83.3402, GNorm = 0.7551, lr_0 = 1.9532e-04
Loss = 9.3661e-02, PNorm = 83.3412, GNorm = 0.6791, lr_0 = 1.9518e-04
Loss = 8.4236e-02, PNorm = 83.3432, GNorm = 0.4974, lr_0 = 1.9505e-04
Loss = 9.8595e-02, PNorm = 83.3455, GNorm = 0.7043, lr_0 = 1.9492e-04
Loss = 9.5786e-02, PNorm = 83.3490, GNorm = 0.8153, lr_0 = 1.9478e-04
Loss = 9.9636e-02, PNorm = 83.3498, GNorm = 0.6728, lr_0 = 1.9465e-04
Loss = 9.2593e-02, PNorm = 83.3543, GNorm = 0.5420, lr_0 = 1.9452e-04
Loss = 9.2294e-02, PNorm = 83.3601, GNorm = 0.6062, lr_0 = 1.9438e-04
Loss = 9.7965e-02, PNorm = 83.3626, GNorm = 0.7838, lr_0 = 1.9425e-04
Loss = 1.0201e-01, PNorm = 83.3649, GNorm = 0.5865, lr_0 = 1.9412e-04
Loss = 9.8687e-02, PNorm = 83.3690, GNorm = 0.9516, lr_0 = 1.9398e-04
Loss = 9.2488e-02, PNorm = 83.3707, GNorm = 0.6275, lr_0 = 1.9385e-04
Loss = 9.2750e-02, PNorm = 83.3712, GNorm = 0.7136, lr_0 = 1.9372e-04
Loss = 8.4182e-02, PNorm = 83.3722, GNorm = 0.6331, lr_0 = 1.9359e-04
Loss = 8.3668e-02, PNorm = 83.3734, GNorm = 0.6095, lr_0 = 1.9345e-04
Loss = 9.1149e-02, PNorm = 83.3750, GNorm = 0.7731, lr_0 = 1.9332e-04
Loss = 8.4512e-02, PNorm = 83.3788, GNorm = 0.7010, lr_0 = 1.9319e-04
Loss = 9.0973e-02, PNorm = 83.3839, GNorm = 0.6421, lr_0 = 1.9306e-04
Validation mae = 0.226931
Epoch 22
Loss = 7.9779e-02, PNorm = 83.3874, GNorm = 0.4288, lr_0 = 1.9292e-04
Loss = 8.2350e-02, PNorm = 83.3921, GNorm = 0.8911, lr_0 = 1.9279e-04
Loss = 8.1115e-02, PNorm = 83.3971, GNorm = 0.4898, lr_0 = 1.9266e-04
Loss = 9.0423e-02, PNorm = 83.4007, GNorm = 0.6340, lr_0 = 1.9253e-04
Loss = 8.8107e-02, PNorm = 83.4004, GNorm = 0.6385, lr_0 = 1.9240e-04
Loss = 8.1736e-02, PNorm = 83.4018, GNorm = 0.6994, lr_0 = 1.9226e-04
Loss = 7.8790e-02, PNorm = 83.4054, GNorm = 0.6329, lr_0 = 1.9213e-04
Loss = 9.4091e-02, PNorm = 83.4090, GNorm = 0.6421, lr_0 = 1.9200e-04
Loss = 9.1602e-02, PNorm = 83.4147, GNorm = 0.4980, lr_0 = 1.9187e-04
Loss = 8.4946e-02, PNorm = 83.4211, GNorm = 0.9498, lr_0 = 1.9174e-04
Loss = 8.0263e-02, PNorm = 83.4275, GNorm = 0.5370, lr_0 = 1.9161e-04
Loss = 9.8176e-02, PNorm = 83.4313, GNorm = 0.5529, lr_0 = 1.9148e-04
Loss = 9.3590e-02, PNorm = 83.4351, GNorm = 0.7385, lr_0 = 1.9134e-04
Loss = 7.5238e-02, PNorm = 83.4377, GNorm = 0.5298, lr_0 = 1.9121e-04
Loss = 8.6343e-02, PNorm = 83.4387, GNorm = 1.0613, lr_0 = 1.9108e-04
Loss = 9.1228e-02, PNorm = 83.4429, GNorm = 0.7766, lr_0 = 1.9095e-04
Loss = 7.6622e-02, PNorm = 83.4459, GNorm = 0.6446, lr_0 = 1.9082e-04
Loss = 8.2667e-02, PNorm = 83.4489, GNorm = 0.9016, lr_0 = 1.9069e-04
Loss = 8.4073e-02, PNorm = 83.4526, GNorm = 0.5594, lr_0 = 1.9056e-04
Loss = 8.9356e-02, PNorm = 83.4574, GNorm = 0.6333, lr_0 = 1.9043e-04
Loss = 8.9099e-02, PNorm = 83.4622, GNorm = 0.6047, lr_0 = 1.9030e-04
Loss = 7.0466e-02, PNorm = 83.4651, GNorm = 0.8678, lr_0 = 1.9017e-04
Loss = 8.7442e-02, PNorm = 83.4682, GNorm = 0.5187, lr_0 = 1.9004e-04
Loss = 8.8883e-02, PNorm = 83.4711, GNorm = 0.6994, lr_0 = 1.8991e-04
Loss = 1.0005e-01, PNorm = 83.4735, GNorm = 0.6658, lr_0 = 1.8978e-04
Loss = 7.5754e-02, PNorm = 83.4769, GNorm = 0.7507, lr_0 = 1.8965e-04
Loss = 8.3054e-02, PNorm = 83.4801, GNorm = 0.5443, lr_0 = 1.8952e-04
Loss = 8.1902e-02, PNorm = 83.4839, GNorm = 0.5115, lr_0 = 1.8939e-04
Loss = 8.0883e-02, PNorm = 83.4874, GNorm = 0.5040, lr_0 = 1.8926e-04
Loss = 9.2878e-02, PNorm = 83.4908, GNorm = 0.5889, lr_0 = 1.8913e-04
Loss = 8.8540e-02, PNorm = 83.4928, GNorm = 0.7161, lr_0 = 1.8900e-04
Loss = 8.7818e-02, PNorm = 83.4945, GNorm = 0.6737, lr_0 = 1.8887e-04
Loss = 7.9528e-02, PNorm = 83.4967, GNorm = 0.5155, lr_0 = 1.8874e-04
Loss = 8.0356e-02, PNorm = 83.4999, GNorm = 0.4643, lr_0 = 1.8861e-04
Loss = 8.8172e-02, PNorm = 83.5047, GNorm = 0.6218, lr_0 = 1.8848e-04
Loss = 8.5568e-02, PNorm = 83.5095, GNorm = 0.4224, lr_0 = 1.8835e-04
Loss = 1.0097e-01, PNorm = 83.5150, GNorm = 0.6176, lr_0 = 1.8822e-04
Loss = 7.6056e-02, PNorm = 83.5183, GNorm = 0.6079, lr_0 = 1.8809e-04
Loss = 8.2411e-02, PNorm = 83.5197, GNorm = 0.6544, lr_0 = 1.8797e-04
Loss = 8.4306e-02, PNorm = 83.5242, GNorm = 0.6701, lr_0 = 1.8784e-04
Loss = 8.3494e-02, PNorm = 83.5279, GNorm = 0.7777, lr_0 = 1.8771e-04
Loss = 8.7454e-02, PNorm = 83.5311, GNorm = 0.6215, lr_0 = 1.8758e-04
Loss = 9.0359e-02, PNorm = 83.5352, GNorm = 1.0396, lr_0 = 1.8745e-04
Loss = 8.0373e-02, PNorm = 83.5402, GNorm = 0.6697, lr_0 = 1.8732e-04
Loss = 9.1198e-02, PNorm = 83.5416, GNorm = 0.6404, lr_0 = 1.8719e-04
Loss = 1.0498e-01, PNorm = 83.5407, GNorm = 0.6274, lr_0 = 1.8707e-04
Loss = 7.8562e-02, PNorm = 83.5424, GNorm = 0.5409, lr_0 = 1.8694e-04
Loss = 8.8635e-02, PNorm = 83.5485, GNorm = 0.4790, lr_0 = 1.8681e-04
Loss = 8.1935e-02, PNorm = 83.5557, GNorm = 0.7701, lr_0 = 1.8668e-04
Loss = 8.4664e-02, PNorm = 83.5610, GNorm = 0.7581, lr_0 = 1.8655e-04
Loss = 9.6214e-02, PNorm = 83.5647, GNorm = 0.6834, lr_0 = 1.8643e-04
Loss = 8.1978e-02, PNorm = 83.5681, GNorm = 0.6715, lr_0 = 1.8630e-04
Loss = 8.0226e-02, PNorm = 83.5693, GNorm = 0.4671, lr_0 = 1.8617e-04
Loss = 7.8813e-02, PNorm = 83.5711, GNorm = 0.6960, lr_0 = 1.8604e-04
Loss = 1.0101e-01, PNorm = 83.5755, GNorm = 0.8114, lr_0 = 1.8592e-04
Loss = 8.0207e-02, PNorm = 83.5771, GNorm = 0.7227, lr_0 = 1.8579e-04
Loss = 8.7541e-02, PNorm = 83.5796, GNorm = 0.6358, lr_0 = 1.8566e-04
Loss = 7.9787e-02, PNorm = 83.5846, GNorm = 0.5035, lr_0 = 1.8553e-04
Loss = 8.9957e-02, PNorm = 83.5876, GNorm = 0.7315, lr_0 = 1.8541e-04
Loss = 9.4656e-02, PNorm = 83.5895, GNorm = 0.8032, lr_0 = 1.8528e-04
Loss = 8.0142e-02, PNorm = 83.5925, GNorm = 0.7025, lr_0 = 1.8515e-04
Loss = 8.6487e-02, PNorm = 83.5931, GNorm = 0.6336, lr_0 = 1.8503e-04
Loss = 8.5289e-02, PNorm = 83.5945, GNorm = 0.7443, lr_0 = 1.8490e-04
Loss = 8.3344e-02, PNorm = 83.5957, GNorm = 0.5658, lr_0 = 1.8477e-04
Loss = 9.5626e-02, PNorm = 83.5977, GNorm = 0.6619, lr_0 = 1.8465e-04
Loss = 9.8376e-02, PNorm = 83.6023, GNorm = 0.9704, lr_0 = 1.8452e-04
Loss = 7.9448e-02, PNorm = 83.6052, GNorm = 0.6129, lr_0 = 1.8439e-04
Loss = 7.8000e-02, PNorm = 83.6072, GNorm = 0.5688, lr_0 = 1.8427e-04
Loss = 9.4868e-02, PNorm = 83.6123, GNorm = 0.5643, lr_0 = 1.8414e-04
Loss = 9.1663e-02, PNorm = 83.6143, GNorm = 0.6241, lr_0 = 1.8401e-04
Loss = 8.8723e-02, PNorm = 83.6168, GNorm = 0.5173, lr_0 = 1.8389e-04
Loss = 9.3688e-02, PNorm = 83.6203, GNorm = 0.7060, lr_0 = 1.8376e-04
Loss = 9.0102e-02, PNorm = 83.6259, GNorm = 0.7340, lr_0 = 1.8364e-04
Loss = 9.0164e-02, PNorm = 83.6301, GNorm = 0.5946, lr_0 = 1.8351e-04
Loss = 7.6602e-02, PNorm = 83.6347, GNorm = 0.5102, lr_0 = 1.8338e-04
Loss = 7.9423e-02, PNorm = 83.6399, GNorm = 0.5008, lr_0 = 1.8326e-04
Loss = 7.8406e-02, PNorm = 83.6430, GNorm = 0.6314, lr_0 = 1.8313e-04
Loss = 8.1973e-02, PNorm = 83.6449, GNorm = 0.4910, lr_0 = 1.8301e-04
Loss = 8.0307e-02, PNorm = 83.6456, GNorm = 0.8263, lr_0 = 1.8288e-04
Loss = 8.3461e-02, PNorm = 83.6472, GNorm = 0.7647, lr_0 = 1.8276e-04
Loss = 9.0303e-02, PNorm = 83.6467, GNorm = 0.6125, lr_0 = 1.8263e-04
Loss = 8.1727e-02, PNorm = 83.6489, GNorm = 0.4839, lr_0 = 1.8251e-04
Loss = 8.5076e-02, PNorm = 83.6510, GNorm = 0.6190, lr_0 = 1.8238e-04
Loss = 7.3668e-02, PNorm = 83.6567, GNorm = 0.6646, lr_0 = 1.8226e-04
Loss = 9.3245e-02, PNorm = 83.6614, GNorm = 0.4876, lr_0 = 1.8213e-04
Loss = 8.5723e-02, PNorm = 83.6643, GNorm = 0.6006, lr_0 = 1.8201e-04
Loss = 8.2743e-02, PNorm = 83.6679, GNorm = 0.5998, lr_0 = 1.8188e-04
Loss = 1.0113e-01, PNorm = 83.6716, GNorm = 0.6437, lr_0 = 1.8176e-04
Loss = 8.4935e-02, PNorm = 83.6740, GNorm = 0.6438, lr_0 = 1.8163e-04
Loss = 8.7786e-02, PNorm = 83.6753, GNorm = 0.6288, lr_0 = 1.8151e-04
Loss = 9.1922e-02, PNorm = 83.6755, GNorm = 0.6097, lr_0 = 1.8138e-04
Loss = 8.8381e-02, PNorm = 83.6789, GNorm = 0.7082, lr_0 = 1.8126e-04
Loss = 9.6949e-02, PNorm = 83.6823, GNorm = 0.6780, lr_0 = 1.8114e-04
Loss = 9.4519e-02, PNorm = 83.6831, GNorm = 0.8782, lr_0 = 1.8101e-04
Loss = 9.2147e-02, PNorm = 83.6859, GNorm = 0.7639, lr_0 = 1.8089e-04
Loss = 8.7891e-02, PNorm = 83.6899, GNorm = 0.5408, lr_0 = 1.8076e-04
Loss = 7.5264e-02, PNorm = 83.6914, GNorm = 0.6573, lr_0 = 1.8064e-04
Loss = 9.5527e-02, PNorm = 83.6896, GNorm = 0.7114, lr_0 = 1.8052e-04
Loss = 8.4771e-02, PNorm = 83.6941, GNorm = 0.6535, lr_0 = 1.8039e-04
Loss = 8.4234e-02, PNorm = 83.7003, GNorm = 1.1771, lr_0 = 1.8027e-04
Loss = 8.5398e-02, PNorm = 83.7042, GNorm = 0.7096, lr_0 = 1.8015e-04
Loss = 8.4274e-02, PNorm = 83.7068, GNorm = 0.5454, lr_0 = 1.8002e-04
Loss = 7.3968e-02, PNorm = 83.7086, GNorm = 0.5580, lr_0 = 1.7990e-04
Loss = 9.0975e-02, PNorm = 83.7130, GNorm = 1.1221, lr_0 = 1.7978e-04
Loss = 1.1284e-01, PNorm = 83.7177, GNorm = 0.8357, lr_0 = 1.7965e-04
Loss = 8.5698e-02, PNorm = 83.7214, GNorm = 0.4607, lr_0 = 1.7953e-04
Loss = 9.2375e-02, PNorm = 83.7221, GNorm = 0.5116, lr_0 = 1.7941e-04
Loss = 8.6511e-02, PNorm = 83.7228, GNorm = 0.6840, lr_0 = 1.7928e-04
Loss = 9.1156e-02, PNorm = 83.7246, GNorm = 0.7302, lr_0 = 1.7916e-04
Loss = 9.7668e-02, PNorm = 83.7282, GNorm = 0.6373, lr_0 = 1.7904e-04
Loss = 9.6369e-02, PNorm = 83.7312, GNorm = 0.8689, lr_0 = 1.7892e-04
Loss = 8.3337e-02, PNorm = 83.7357, GNorm = 0.6608, lr_0 = 1.7879e-04
Loss = 8.4628e-02, PNorm = 83.7388, GNorm = 0.6670, lr_0 = 1.7867e-04
Loss = 7.9190e-02, PNorm = 83.7405, GNorm = 0.6401, lr_0 = 1.7855e-04
Loss = 8.8768e-02, PNorm = 83.7437, GNorm = 0.5739, lr_0 = 1.7843e-04
Loss = 9.1601e-02, PNorm = 83.7473, GNorm = 0.7044, lr_0 = 1.7830e-04
Loss = 8.2993e-02, PNorm = 83.7503, GNorm = 0.7728, lr_0 = 1.7818e-04
Loss = 9.1448e-02, PNorm = 83.7531, GNorm = 0.7754, lr_0 = 1.7806e-04
Loss = 7.9440e-02, PNorm = 83.7547, GNorm = 0.5719, lr_0 = 1.7794e-04
Loss = 8.3656e-02, PNorm = 83.7594, GNorm = 0.6855, lr_0 = 1.7782e-04
Validation mae = 0.228968
Epoch 23
Loss = 7.6754e-02, PNorm = 83.7623, GNorm = 0.9158, lr_0 = 1.7769e-04
Loss = 8.0140e-02, PNorm = 83.7661, GNorm = 0.6515, lr_0 = 1.7757e-04
Loss = 8.9518e-02, PNorm = 83.7673, GNorm = 0.6242, lr_0 = 1.7745e-04
Loss = 8.3935e-02, PNorm = 83.7677, GNorm = 0.4835, lr_0 = 1.7733e-04
Loss = 7.5717e-02, PNorm = 83.7716, GNorm = 0.6294, lr_0 = 1.7721e-04
Loss = 7.8328e-02, PNorm = 83.7766, GNorm = 0.6648, lr_0 = 1.7709e-04
Loss = 7.4948e-02, PNorm = 83.7807, GNorm = 0.6744, lr_0 = 1.7696e-04
Loss = 8.3198e-02, PNorm = 83.7830, GNorm = 0.5748, lr_0 = 1.7684e-04
Loss = 8.5075e-02, PNorm = 83.7869, GNorm = 0.5151, lr_0 = 1.7672e-04
Loss = 8.5009e-02, PNorm = 83.7922, GNorm = 0.6261, lr_0 = 1.7660e-04
Loss = 8.9927e-02, PNorm = 83.7932, GNorm = 0.5301, lr_0 = 1.7648e-04
Loss = 7.8796e-02, PNorm = 83.7944, GNorm = 0.6564, lr_0 = 1.7636e-04
Loss = 8.9059e-02, PNorm = 83.7969, GNorm = 0.7210, lr_0 = 1.7624e-04
Loss = 8.4578e-02, PNorm = 83.8002, GNorm = 0.6964, lr_0 = 1.7612e-04
Loss = 8.0126e-02, PNorm = 83.8007, GNorm = 0.5963, lr_0 = 1.7600e-04
Loss = 7.6811e-02, PNorm = 83.8038, GNorm = 0.5455, lr_0 = 1.7588e-04
Loss = 9.8188e-02, PNorm = 83.8087, GNorm = 0.7043, lr_0 = 1.7576e-04
Loss = 8.1747e-02, PNorm = 83.8106, GNorm = 0.7000, lr_0 = 1.7564e-04
Loss = 7.0917e-02, PNorm = 83.8127, GNorm = 0.7265, lr_0 = 1.7552e-04
Loss = 7.2657e-02, PNorm = 83.8152, GNorm = 0.6293, lr_0 = 1.7540e-04
Loss = 6.3929e-02, PNorm = 83.8178, GNorm = 0.5266, lr_0 = 1.7528e-04
Loss = 7.0793e-02, PNorm = 83.8211, GNorm = 0.4936, lr_0 = 1.7516e-04
Loss = 7.9017e-02, PNorm = 83.8255, GNorm = 0.8275, lr_0 = 1.7504e-04
Loss = 7.2136e-02, PNorm = 83.8280, GNorm = 0.6902, lr_0 = 1.7492e-04
Loss = 8.5394e-02, PNorm = 83.8319, GNorm = 0.6834, lr_0 = 1.7480e-04
Loss = 7.2408e-02, PNorm = 83.8340, GNorm = 0.7053, lr_0 = 1.7468e-04
Loss = 8.5711e-02, PNorm = 83.8374, GNorm = 0.7209, lr_0 = 1.7456e-04
Loss = 8.7502e-02, PNorm = 83.8426, GNorm = 0.5755, lr_0 = 1.7444e-04
Loss = 9.2806e-02, PNorm = 83.8470, GNorm = 0.5290, lr_0 = 1.7432e-04
Loss = 7.5373e-02, PNorm = 83.8488, GNorm = 0.7796, lr_0 = 1.7420e-04
Loss = 8.6718e-02, PNorm = 83.8508, GNorm = 0.6496, lr_0 = 1.7408e-04
Loss = 8.5763e-02, PNorm = 83.8546, GNorm = 0.6866, lr_0 = 1.7396e-04
Loss = 8.6262e-02, PNorm = 83.8576, GNorm = 0.7634, lr_0 = 1.7384e-04
Loss = 9.2849e-02, PNorm = 83.8615, GNorm = 0.6843, lr_0 = 1.7372e-04
Loss = 8.8274e-02, PNorm = 83.8653, GNorm = 0.9527, lr_0 = 1.7360e-04
Loss = 8.2709e-02, PNorm = 83.8686, GNorm = 0.5969, lr_0 = 1.7348e-04
Loss = 8.3362e-02, PNorm = 83.8725, GNorm = 0.6143, lr_0 = 1.7336e-04
Loss = 7.7063e-02, PNorm = 83.8764, GNorm = 0.6941, lr_0 = 1.7325e-04
Loss = 7.9724e-02, PNorm = 83.8780, GNorm = 0.5576, lr_0 = 1.7313e-04
Loss = 8.6708e-02, PNorm = 83.8785, GNorm = 0.9141, lr_0 = 1.7301e-04
Loss = 8.2707e-02, PNorm = 83.8829, GNorm = 0.6833, lr_0 = 1.7289e-04
Loss = 9.3368e-02, PNorm = 83.8896, GNorm = 0.7062, lr_0 = 1.7277e-04
Loss = 9.4537e-02, PNorm = 83.8919, GNorm = 0.5203, lr_0 = 1.7265e-04
Loss = 7.7814e-02, PNorm = 83.8946, GNorm = 0.6521, lr_0 = 1.7253e-04
Loss = 8.4309e-02, PNorm = 83.8971, GNorm = 0.8592, lr_0 = 1.7242e-04
Loss = 8.4462e-02, PNorm = 83.8987, GNorm = 0.5666, lr_0 = 1.7230e-04
Loss = 8.6898e-02, PNorm = 83.8991, GNorm = 0.5796, lr_0 = 1.7218e-04
Loss = 7.6933e-02, PNorm = 83.9021, GNorm = 0.5701, lr_0 = 1.7206e-04
Loss = 9.1639e-02, PNorm = 83.9069, GNorm = 0.5810, lr_0 = 1.7194e-04
Loss = 7.9515e-02, PNorm = 83.9105, GNorm = 0.4537, lr_0 = 1.7183e-04
Loss = 8.3068e-02, PNorm = 83.9122, GNorm = 0.6210, lr_0 = 1.7171e-04
Loss = 9.3361e-02, PNorm = 83.9155, GNorm = 0.9625, lr_0 = 1.7159e-04
Loss = 9.3299e-02, PNorm = 83.9179, GNorm = 0.5300, lr_0 = 1.7147e-04
Loss = 8.3529e-02, PNorm = 83.9205, GNorm = 0.4793, lr_0 = 1.7136e-04
Loss = 9.2773e-02, PNorm = 83.9251, GNorm = 0.6268, lr_0 = 1.7124e-04
Loss = 8.3760e-02, PNorm = 83.9272, GNorm = 0.8628, lr_0 = 1.7112e-04
Loss = 8.2687e-02, PNorm = 83.9298, GNorm = 0.5399, lr_0 = 1.7100e-04
Loss = 9.3004e-02, PNorm = 83.9330, GNorm = 0.6613, lr_0 = 1.7089e-04
Loss = 8.2571e-02, PNorm = 83.9357, GNorm = 0.6589, lr_0 = 1.7077e-04
Loss = 6.9173e-02, PNorm = 83.9384, GNorm = 0.7920, lr_0 = 1.7065e-04
Loss = 8.2376e-02, PNorm = 83.9409, GNorm = 0.6293, lr_0 = 1.7054e-04
Loss = 8.1253e-02, PNorm = 83.9444, GNorm = 0.5507, lr_0 = 1.7042e-04
Loss = 7.6138e-02, PNorm = 83.9486, GNorm = 0.6610, lr_0 = 1.7030e-04
Loss = 8.0828e-02, PNorm = 83.9517, GNorm = 0.7230, lr_0 = 1.7019e-04
Loss = 8.9888e-02, PNorm = 83.9540, GNorm = 0.8951, lr_0 = 1.7007e-04
Loss = 8.7729e-02, PNorm = 83.9571, GNorm = 0.5147, lr_0 = 1.6995e-04
Loss = 8.4401e-02, PNorm = 83.9610, GNorm = 0.6520, lr_0 = 1.6984e-04
Loss = 7.7349e-02, PNorm = 83.9632, GNorm = 0.6603, lr_0 = 1.6972e-04
Loss = 7.4779e-02, PNorm = 83.9626, GNorm = 0.5992, lr_0 = 1.6960e-04
Loss = 7.8326e-02, PNorm = 83.9638, GNorm = 1.0233, lr_0 = 1.6949e-04
Loss = 8.8770e-02, PNorm = 83.9649, GNorm = 0.9945, lr_0 = 1.6937e-04
Loss = 9.4948e-02, PNorm = 83.9668, GNorm = 0.5458, lr_0 = 1.6926e-04
Loss = 8.8228e-02, PNorm = 83.9695, GNorm = 0.6127, lr_0 = 1.6914e-04
Loss = 9.1232e-02, PNorm = 83.9719, GNorm = 0.7319, lr_0 = 1.6902e-04
Loss = 8.2420e-02, PNorm = 83.9743, GNorm = 0.5593, lr_0 = 1.6891e-04
Loss = 8.6375e-02, PNorm = 83.9749, GNorm = 0.6858, lr_0 = 1.6879e-04
Loss = 8.3851e-02, PNorm = 83.9770, GNorm = 0.7217, lr_0 = 1.6868e-04
Loss = 9.2337e-02, PNorm = 83.9792, GNorm = 0.8298, lr_0 = 1.6856e-04
Loss = 8.0329e-02, PNorm = 83.9828, GNorm = 0.9581, lr_0 = 1.6845e-04
Loss = 8.6886e-02, PNorm = 83.9851, GNorm = 0.7189, lr_0 = 1.6833e-04
Loss = 9.0385e-02, PNorm = 83.9866, GNorm = 0.5136, lr_0 = 1.6821e-04
Loss = 9.0560e-02, PNorm = 83.9892, GNorm = 0.6021, lr_0 = 1.6810e-04
Loss = 9.5111e-02, PNorm = 83.9919, GNorm = 0.6011, lr_0 = 1.6798e-04
Loss = 8.7925e-02, PNorm = 83.9935, GNorm = 0.7442, lr_0 = 1.6787e-04
Loss = 9.1607e-02, PNorm = 83.9980, GNorm = 0.6423, lr_0 = 1.6775e-04
Loss = 8.3842e-02, PNorm = 84.0017, GNorm = 0.6793, lr_0 = 1.6764e-04
Loss = 8.7511e-02, PNorm = 84.0035, GNorm = 0.7781, lr_0 = 1.6752e-04
Loss = 7.4988e-02, PNorm = 84.0082, GNorm = 0.8969, lr_0 = 1.6741e-04
Loss = 8.5043e-02, PNorm = 84.0117, GNorm = 0.6825, lr_0 = 1.6729e-04
Loss = 8.3264e-02, PNorm = 84.0126, GNorm = 0.7070, lr_0 = 1.6718e-04
Loss = 8.1643e-02, PNorm = 84.0138, GNorm = 0.5537, lr_0 = 1.6707e-04
Loss = 8.3204e-02, PNorm = 84.0157, GNorm = 0.7783, lr_0 = 1.6695e-04
Loss = 8.5278e-02, PNorm = 84.0190, GNorm = 0.9357, lr_0 = 1.6684e-04
Loss = 8.2077e-02, PNorm = 84.0264, GNorm = 0.5426, lr_0 = 1.6672e-04
Loss = 9.2085e-02, PNorm = 84.0315, GNorm = 0.8479, lr_0 = 1.6661e-04
Loss = 7.9639e-02, PNorm = 84.0366, GNorm = 0.7201, lr_0 = 1.6649e-04
Loss = 8.6892e-02, PNorm = 84.0382, GNorm = 0.8298, lr_0 = 1.6638e-04
Loss = 1.0780e-01, PNorm = 84.0400, GNorm = 0.8041, lr_0 = 1.6627e-04
Loss = 9.0365e-02, PNorm = 84.0433, GNorm = 0.6381, lr_0 = 1.6615e-04
Loss = 8.9800e-02, PNorm = 84.0474, GNorm = 0.5618, lr_0 = 1.6604e-04
Loss = 9.5023e-02, PNorm = 84.0495, GNorm = 0.7896, lr_0 = 1.6592e-04
Loss = 8.6318e-02, PNorm = 84.0519, GNorm = 0.5761, lr_0 = 1.6581e-04
Loss = 7.6087e-02, PNorm = 84.0554, GNorm = 0.6184, lr_0 = 1.6570e-04
Loss = 8.8927e-02, PNorm = 84.0571, GNorm = 0.6165, lr_0 = 1.6558e-04
Loss = 6.9568e-02, PNorm = 84.0563, GNorm = 0.6702, lr_0 = 1.6547e-04
Loss = 8.4607e-02, PNorm = 84.0579, GNorm = 0.6162, lr_0 = 1.6536e-04
Loss = 8.1138e-02, PNorm = 84.0595, GNorm = 0.5828, lr_0 = 1.6524e-04
Loss = 8.2586e-02, PNorm = 84.0619, GNorm = 0.5745, lr_0 = 1.6513e-04
Loss = 8.3256e-02, PNorm = 84.0669, GNorm = 0.7108, lr_0 = 1.6502e-04
Loss = 9.0373e-02, PNorm = 84.0713, GNorm = 0.7192, lr_0 = 1.6490e-04
Loss = 8.4067e-02, PNorm = 84.0735, GNorm = 0.7634, lr_0 = 1.6479e-04
Loss = 8.6051e-02, PNorm = 84.0752, GNorm = 0.6184, lr_0 = 1.6468e-04
Loss = 8.2209e-02, PNorm = 84.0769, GNorm = 0.5918, lr_0 = 1.6457e-04
Loss = 8.1050e-02, PNorm = 84.0797, GNorm = 0.6372, lr_0 = 1.6445e-04
Loss = 8.0685e-02, PNorm = 84.0824, GNorm = 0.6632, lr_0 = 1.6434e-04
Loss = 9.2446e-02, PNorm = 84.0839, GNorm = 0.5726, lr_0 = 1.6423e-04
Loss = 8.6634e-02, PNorm = 84.0878, GNorm = 0.5751, lr_0 = 1.6412e-04
Loss = 9.2581e-02, PNorm = 84.0913, GNorm = 0.6661, lr_0 = 1.6400e-04
Loss = 8.6236e-02, PNorm = 84.0929, GNorm = 0.6980, lr_0 = 1.6389e-04
Loss = 8.3982e-02, PNorm = 84.0943, GNorm = 0.6363, lr_0 = 1.6378e-04
Validation mae = 0.228016
Epoch 24
Loss = 8.6328e-02, PNorm = 84.0950, GNorm = 0.6447, lr_0 = 1.6367e-04
Loss = 7.9036e-02, PNorm = 84.0958, GNorm = 0.4812, lr_0 = 1.6355e-04
Loss = 7.7660e-02, PNorm = 84.0992, GNorm = 0.6122, lr_0 = 1.6344e-04
Loss = 7.9304e-02, PNorm = 84.1020, GNorm = 0.5113, lr_0 = 1.6333e-04
Loss = 8.9892e-02, PNorm = 84.1069, GNorm = 0.7112, lr_0 = 1.6322e-04
Loss = 7.2608e-02, PNorm = 84.1099, GNorm = 0.5650, lr_0 = 1.6311e-04
Loss = 8.0412e-02, PNorm = 84.1122, GNorm = 0.5254, lr_0 = 1.6299e-04
Loss = 7.3628e-02, PNorm = 84.1153, GNorm = 0.7388, lr_0 = 1.6288e-04
Loss = 8.4722e-02, PNorm = 84.1174, GNorm = 0.5161, lr_0 = 1.6277e-04
Loss = 7.7653e-02, PNorm = 84.1186, GNorm = 0.7597, lr_0 = 1.6266e-04
Loss = 7.7902e-02, PNorm = 84.1208, GNorm = 0.5697, lr_0 = 1.6255e-04
Loss = 7.6236e-02, PNorm = 84.1242, GNorm = 0.5977, lr_0 = 1.6244e-04
Loss = 7.6431e-02, PNorm = 84.1261, GNorm = 0.5200, lr_0 = 1.6233e-04
Loss = 8.8789e-02, PNorm = 84.1303, GNorm = 0.5896, lr_0 = 1.6221e-04
Loss = 8.4413e-02, PNorm = 84.1337, GNorm = 0.5648, lr_0 = 1.6210e-04
Loss = 8.3216e-02, PNorm = 84.1364, GNorm = 0.6233, lr_0 = 1.6199e-04
Loss = 7.5855e-02, PNorm = 84.1390, GNorm = 0.5931, lr_0 = 1.6188e-04
Loss = 6.8131e-02, PNorm = 84.1421, GNorm = 0.5343, lr_0 = 1.6177e-04
Loss = 9.1591e-02, PNorm = 84.1432, GNorm = 0.6398, lr_0 = 1.6166e-04
Loss = 8.2541e-02, PNorm = 84.1454, GNorm = 0.6332, lr_0 = 1.6155e-04
Loss = 7.3731e-02, PNorm = 84.1500, GNorm = 0.6786, lr_0 = 1.6144e-04
Loss = 7.5549e-02, PNorm = 84.1530, GNorm = 0.5855, lr_0 = 1.6133e-04
Loss = 6.7296e-02, PNorm = 84.1556, GNorm = 0.6881, lr_0 = 1.6122e-04
Loss = 8.6684e-02, PNorm = 84.1562, GNorm = 0.6290, lr_0 = 1.6111e-04
Loss = 7.0134e-02, PNorm = 84.1565, GNorm = 0.4193, lr_0 = 1.6100e-04
Loss = 7.8916e-02, PNorm = 84.1580, GNorm = 0.6913, lr_0 = 1.6089e-04
Loss = 7.7262e-02, PNorm = 84.1612, GNorm = 0.6204, lr_0 = 1.6078e-04
Loss = 6.9471e-02, PNorm = 84.1632, GNorm = 0.5757, lr_0 = 1.6067e-04
Loss = 7.8241e-02, PNorm = 84.1648, GNorm = 0.6019, lr_0 = 1.6056e-04
Loss = 9.7391e-02, PNorm = 84.1675, GNorm = 0.7434, lr_0 = 1.6045e-04
Loss = 7.8321e-02, PNorm = 84.1686, GNorm = 0.5752, lr_0 = 1.6034e-04
Loss = 8.5341e-02, PNorm = 84.1708, GNorm = 0.6883, lr_0 = 1.6023e-04
Loss = 8.7723e-02, PNorm = 84.1756, GNorm = 0.7334, lr_0 = 1.6012e-04
Loss = 7.0682e-02, PNorm = 84.1796, GNorm = 0.6266, lr_0 = 1.6001e-04
Loss = 8.4968e-02, PNorm = 84.1808, GNorm = 0.5577, lr_0 = 1.5990e-04
Loss = 9.1068e-02, PNorm = 84.1825, GNorm = 0.6424, lr_0 = 1.5979e-04
Loss = 8.5426e-02, PNorm = 84.1842, GNorm = 0.6226, lr_0 = 1.5968e-04
Loss = 8.2519e-02, PNorm = 84.1869, GNorm = 0.5134, lr_0 = 1.5957e-04
Loss = 7.6853e-02, PNorm = 84.1899, GNorm = 0.7522, lr_0 = 1.5946e-04
Loss = 8.3839e-02, PNorm = 84.1944, GNorm = 0.5789, lr_0 = 1.5935e-04
Loss = 9.1537e-02, PNorm = 84.2006, GNorm = 0.6499, lr_0 = 1.5924e-04
Loss = 8.6869e-02, PNorm = 84.2071, GNorm = 0.7709, lr_0 = 1.5913e-04
Loss = 9.3724e-02, PNorm = 84.2093, GNorm = 0.7944, lr_0 = 1.5902e-04
Loss = 8.0614e-02, PNorm = 84.2126, GNorm = 0.6887, lr_0 = 1.5891e-04
Loss = 7.3917e-02, PNorm = 84.2159, GNorm = 0.4527, lr_0 = 1.5880e-04
Loss = 7.0379e-02, PNorm = 84.2173, GNorm = 0.7825, lr_0 = 1.5870e-04
Loss = 8.6209e-02, PNorm = 84.2193, GNorm = 0.5293, lr_0 = 1.5859e-04
Loss = 8.0303e-02, PNorm = 84.2223, GNorm = 0.5667, lr_0 = 1.5848e-04
Loss = 8.2901e-02, PNorm = 84.2239, GNorm = 0.5068, lr_0 = 1.5837e-04
Loss = 7.9042e-02, PNorm = 84.2266, GNorm = 0.5676, lr_0 = 1.5826e-04
Loss = 9.2741e-02, PNorm = 84.2281, GNorm = 0.7460, lr_0 = 1.5815e-04
Loss = 7.9344e-02, PNorm = 84.2290, GNorm = 0.6061, lr_0 = 1.5804e-04
Loss = 8.5800e-02, PNorm = 84.2303, GNorm = 0.4839, lr_0 = 1.5794e-04
Loss = 8.1565e-02, PNorm = 84.2320, GNorm = 0.6559, lr_0 = 1.5783e-04
Loss = 8.6826e-02, PNorm = 84.2333, GNorm = 0.6720, lr_0 = 1.5772e-04
Loss = 8.6276e-02, PNorm = 84.2334, GNorm = 0.6237, lr_0 = 1.5761e-04
Loss = 8.8612e-02, PNorm = 84.2360, GNorm = 0.7618, lr_0 = 1.5750e-04
Loss = 8.7172e-02, PNorm = 84.2401, GNorm = 0.7251, lr_0 = 1.5740e-04
Loss = 7.4434e-02, PNorm = 84.2426, GNorm = 0.6368, lr_0 = 1.5729e-04
Loss = 8.3355e-02, PNorm = 84.2430, GNorm = 0.7700, lr_0 = 1.5718e-04
Loss = 8.3953e-02, PNorm = 84.2448, GNorm = 0.7735, lr_0 = 1.5707e-04
Loss = 8.1966e-02, PNorm = 84.2468, GNorm = 0.7346, lr_0 = 1.5697e-04
Loss = 7.9794e-02, PNorm = 84.2498, GNorm = 0.7622, lr_0 = 1.5686e-04
Loss = 8.4173e-02, PNorm = 84.2515, GNorm = 0.6159, lr_0 = 1.5675e-04
Loss = 7.8060e-02, PNorm = 84.2545, GNorm = 0.6204, lr_0 = 1.5664e-04
Loss = 8.2891e-02, PNorm = 84.2589, GNorm = 0.5396, lr_0 = 1.5654e-04
Loss = 6.7657e-02, PNorm = 84.2630, GNorm = 0.4631, lr_0 = 1.5643e-04
Loss = 7.4577e-02, PNorm = 84.2652, GNorm = 0.5123, lr_0 = 1.5632e-04
Loss = 8.0193e-02, PNorm = 84.2689, GNorm = 0.6869, lr_0 = 1.5621e-04
Loss = 8.8099e-02, PNorm = 84.2713, GNorm = 0.5903, lr_0 = 1.5611e-04
Loss = 8.0963e-02, PNorm = 84.2738, GNorm = 0.4942, lr_0 = 1.5600e-04
Loss = 7.8780e-02, PNorm = 84.2791, GNorm = 0.7225, lr_0 = 1.5589e-04
Loss = 9.0600e-02, PNorm = 84.2817, GNorm = 0.6349, lr_0 = 1.5579e-04
Loss = 8.8936e-02, PNorm = 84.2829, GNorm = 0.8874, lr_0 = 1.5568e-04
Loss = 9.5551e-02, PNorm = 84.2839, GNorm = 0.8633, lr_0 = 1.5557e-04
Loss = 8.9978e-02, PNorm = 84.2864, GNorm = 0.5323, lr_0 = 1.5547e-04
Loss = 7.2320e-02, PNorm = 84.2900, GNorm = 0.5234, lr_0 = 1.5536e-04
Loss = 8.5875e-02, PNorm = 84.2923, GNorm = 0.6377, lr_0 = 1.5525e-04
Loss = 7.6376e-02, PNorm = 84.2957, GNorm = 0.6003, lr_0 = 1.5515e-04
Loss = 7.0690e-02, PNorm = 84.3001, GNorm = 0.7178, lr_0 = 1.5504e-04
Loss = 6.8508e-02, PNorm = 84.3036, GNorm = 0.6159, lr_0 = 1.5493e-04
Loss = 8.9339e-02, PNorm = 84.3069, GNorm = 0.7352, lr_0 = 1.5483e-04
Loss = 7.2319e-02, PNorm = 84.3097, GNorm = 0.7500, lr_0 = 1.5472e-04
Loss = 8.5253e-02, PNorm = 84.3108, GNorm = 0.6901, lr_0 = 1.5462e-04
Loss = 7.4737e-02, PNorm = 84.3116, GNorm = 0.5796, lr_0 = 1.5451e-04
Loss = 8.7525e-02, PNorm = 84.3154, GNorm = 0.5584, lr_0 = 1.5440e-04
Loss = 8.8695e-02, PNorm = 84.3184, GNorm = 0.7931, lr_0 = 1.5430e-04
Loss = 8.5597e-02, PNorm = 84.3223, GNorm = 0.7964, lr_0 = 1.5419e-04
Loss = 8.3477e-02, PNorm = 84.3241, GNorm = 0.7497, lr_0 = 1.5409e-04
Loss = 7.9929e-02, PNorm = 84.3264, GNorm = 0.9929, lr_0 = 1.5398e-04
Loss = 8.2034e-02, PNorm = 84.3300, GNorm = 0.6732, lr_0 = 1.5388e-04
Loss = 8.1511e-02, PNorm = 84.3329, GNorm = 0.5187, lr_0 = 1.5377e-04
Loss = 8.3442e-02, PNorm = 84.3359, GNorm = 0.7039, lr_0 = 1.5367e-04
Loss = 8.0925e-02, PNorm = 84.3359, GNorm = 0.5769, lr_0 = 1.5356e-04
Loss = 7.9547e-02, PNorm = 84.3393, GNorm = 0.7904, lr_0 = 1.5346e-04
Loss = 7.9917e-02, PNorm = 84.3420, GNorm = 0.6903, lr_0 = 1.5335e-04
Loss = 1.0717e-01, PNorm = 84.3446, GNorm = 0.6244, lr_0 = 1.5325e-04
Loss = 8.4689e-02, PNorm = 84.3453, GNorm = 0.8264, lr_0 = 1.5314e-04
Loss = 8.1591e-02, PNorm = 84.3480, GNorm = 0.6308, lr_0 = 1.5304e-04
Loss = 8.4263e-02, PNorm = 84.3500, GNorm = 0.6350, lr_0 = 1.5293e-04
Loss = 8.8361e-02, PNorm = 84.3536, GNorm = 0.7929, lr_0 = 1.5283e-04
Loss = 8.0921e-02, PNorm = 84.3581, GNorm = 0.7408, lr_0 = 1.5272e-04
Loss = 9.3079e-02, PNorm = 84.3609, GNorm = 0.7852, lr_0 = 1.5262e-04
Loss = 9.1652e-02, PNorm = 84.3610, GNorm = 0.7679, lr_0 = 1.5251e-04
Loss = 8.4222e-02, PNorm = 84.3636, GNorm = 0.7073, lr_0 = 1.5241e-04
Loss = 8.5654e-02, PNorm = 84.3651, GNorm = 0.8596, lr_0 = 1.5230e-04
Loss = 8.8861e-02, PNorm = 84.3654, GNorm = 0.8398, lr_0 = 1.5220e-04
Loss = 7.8705e-02, PNorm = 84.3676, GNorm = 0.6222, lr_0 = 1.5209e-04
Loss = 7.9782e-02, PNorm = 84.3697, GNorm = 0.4813, lr_0 = 1.5199e-04
Loss = 6.4325e-02, PNorm = 84.3720, GNorm = 0.6142, lr_0 = 1.5189e-04
Loss = 9.2810e-02, PNorm = 84.3745, GNorm = 0.7211, lr_0 = 1.5178e-04
Loss = 7.6128e-02, PNorm = 84.3768, GNorm = 0.6389, lr_0 = 1.5168e-04
Loss = 8.3248e-02, PNorm = 84.3782, GNorm = 0.5897, lr_0 = 1.5157e-04
Loss = 7.8422e-02, PNorm = 84.3805, GNorm = 0.6499, lr_0 = 1.5147e-04
Loss = 7.7608e-02, PNorm = 84.3835, GNorm = 0.6419, lr_0 = 1.5137e-04
Loss = 8.5696e-02, PNorm = 84.3853, GNorm = 0.5196, lr_0 = 1.5126e-04
Loss = 8.8029e-02, PNorm = 84.3873, GNorm = 0.6700, lr_0 = 1.5116e-04
Loss = 7.9927e-02, PNorm = 84.3904, GNorm = 0.6379, lr_0 = 1.5106e-04
Loss = 9.0430e-02, PNorm = 84.3935, GNorm = 0.6597, lr_0 = 1.5095e-04
Loss = 8.8338e-02, PNorm = 84.3966, GNorm = 0.6438, lr_0 = 1.5085e-04
Validation mae = 0.227356
Epoch 25
Loss = 7.7648e-02, PNorm = 84.4001, GNorm = 0.5779, lr_0 = 1.5075e-04
Loss = 8.4322e-02, PNorm = 84.4012, GNorm = 0.8133, lr_0 = 1.5064e-04
Loss = 7.2749e-02, PNorm = 84.3999, GNorm = 0.6851, lr_0 = 1.5054e-04
Loss = 8.1794e-02, PNorm = 84.3999, GNorm = 1.0269, lr_0 = 1.5044e-04
Loss = 8.4359e-02, PNorm = 84.4014, GNorm = 0.5694, lr_0 = 1.5033e-04
Loss = 8.1392e-02, PNorm = 84.4039, GNorm = 0.7483, lr_0 = 1.5023e-04
Loss = 8.5831e-02, PNorm = 84.4063, GNorm = 0.5807, lr_0 = 1.5013e-04
Loss = 7.7506e-02, PNorm = 84.4075, GNorm = 0.7710, lr_0 = 1.5002e-04
Loss = 8.2937e-02, PNorm = 84.4082, GNorm = 0.5683, lr_0 = 1.4992e-04
Loss = 8.2082e-02, PNorm = 84.4128, GNorm = 0.4173, lr_0 = 1.4982e-04
Loss = 8.1631e-02, PNorm = 84.4154, GNorm = 0.5892, lr_0 = 1.4972e-04
Loss = 7.6853e-02, PNorm = 84.4178, GNorm = 0.6296, lr_0 = 1.4961e-04
Loss = 7.6026e-02, PNorm = 84.4186, GNorm = 0.6703, lr_0 = 1.4951e-04
Loss = 8.0443e-02, PNorm = 84.4222, GNorm = 0.6351, lr_0 = 1.4941e-04
Loss = 8.6220e-02, PNorm = 84.4246, GNorm = 0.6928, lr_0 = 1.4931e-04
Loss = 7.7876e-02, PNorm = 84.4260, GNorm = 0.7249, lr_0 = 1.4920e-04
Loss = 7.6683e-02, PNorm = 84.4279, GNorm = 0.5213, lr_0 = 1.4910e-04
Loss = 7.9173e-02, PNorm = 84.4296, GNorm = 1.0673, lr_0 = 1.4900e-04
Loss = 7.6985e-02, PNorm = 84.4298, GNorm = 0.6437, lr_0 = 1.4890e-04
Loss = 7.6486e-02, PNorm = 84.4315, GNorm = 0.8504, lr_0 = 1.4880e-04
Loss = 8.7783e-02, PNorm = 84.4328, GNorm = 0.6865, lr_0 = 1.4869e-04
Loss = 7.5706e-02, PNorm = 84.4337, GNorm = 0.5276, lr_0 = 1.4859e-04
Loss = 8.8324e-02, PNorm = 84.4355, GNorm = 0.7227, lr_0 = 1.4849e-04
Loss = 8.8112e-02, PNorm = 84.4392, GNorm = 0.5478, lr_0 = 1.4839e-04
Loss = 7.9135e-02, PNorm = 84.4408, GNorm = 0.6682, lr_0 = 1.4829e-04
Loss = 7.6217e-02, PNorm = 84.4430, GNorm = 0.6030, lr_0 = 1.4818e-04
Loss = 8.8748e-02, PNorm = 84.4458, GNorm = 0.6326, lr_0 = 1.4808e-04
Loss = 8.3898e-02, PNorm = 84.4499, GNorm = 0.5830, lr_0 = 1.4798e-04
Loss = 8.1101e-02, PNorm = 84.4517, GNorm = 0.5962, lr_0 = 1.4788e-04
Loss = 8.4181e-02, PNorm = 84.4553, GNorm = 0.5455, lr_0 = 1.4778e-04
Loss = 7.7741e-02, PNorm = 84.4584, GNorm = 0.5869, lr_0 = 1.4768e-04
Loss = 7.2739e-02, PNorm = 84.4614, GNorm = 0.6483, lr_0 = 1.4758e-04
Loss = 7.2274e-02, PNorm = 84.4639, GNorm = 0.6452, lr_0 = 1.4748e-04
Loss = 7.5267e-02, PNorm = 84.4667, GNorm = 0.5151, lr_0 = 1.4737e-04
Loss = 9.1423e-02, PNorm = 84.4701, GNorm = 0.6199, lr_0 = 1.4727e-04
Loss = 8.6449e-02, PNorm = 84.4741, GNorm = 0.6281, lr_0 = 1.4717e-04
Loss = 8.5667e-02, PNorm = 84.4769, GNorm = 0.6638, lr_0 = 1.4707e-04
Loss = 7.2297e-02, PNorm = 84.4794, GNorm = 0.7628, lr_0 = 1.4697e-04
Loss = 7.1933e-02, PNorm = 84.4810, GNorm = 0.6925, lr_0 = 1.4687e-04
Loss = 6.6850e-02, PNorm = 84.4811, GNorm = 0.4471, lr_0 = 1.4677e-04
Loss = 8.5269e-02, PNorm = 84.4820, GNorm = 0.7852, lr_0 = 1.4667e-04
Loss = 8.6107e-02, PNorm = 84.4865, GNorm = 0.6644, lr_0 = 1.4657e-04
Loss = 7.6995e-02, PNorm = 84.4900, GNorm = 0.5023, lr_0 = 1.4647e-04
Loss = 7.1951e-02, PNorm = 84.4937, GNorm = 0.6796, lr_0 = 1.4637e-04
Loss = 7.7819e-02, PNorm = 84.4950, GNorm = 0.5591, lr_0 = 1.4627e-04
Loss = 8.2026e-02, PNorm = 84.4980, GNorm = 0.8556, lr_0 = 1.4617e-04
Loss = 6.9065e-02, PNorm = 84.5021, GNorm = 0.6644, lr_0 = 1.4607e-04
Loss = 7.9382e-02, PNorm = 84.5047, GNorm = 0.4983, lr_0 = 1.4597e-04
Loss = 8.3073e-02, PNorm = 84.5066, GNorm = 0.7608, lr_0 = 1.4587e-04
Loss = 7.3263e-02, PNorm = 84.5099, GNorm = 0.5424, lr_0 = 1.4577e-04
Loss = 7.7512e-02, PNorm = 84.5150, GNorm = 0.6456, lr_0 = 1.4567e-04
Loss = 7.4729e-02, PNorm = 84.5161, GNorm = 0.5722, lr_0 = 1.4557e-04
Loss = 7.8149e-02, PNorm = 84.5168, GNorm = 0.7000, lr_0 = 1.4547e-04
Loss = 8.7495e-02, PNorm = 84.5183, GNorm = 0.6169, lr_0 = 1.4537e-04
Loss = 7.7206e-02, PNorm = 84.5206, GNorm = 0.7128, lr_0 = 1.4527e-04
Loss = 6.7178e-02, PNorm = 84.5233, GNorm = 0.7376, lr_0 = 1.4517e-04
Loss = 8.7734e-02, PNorm = 84.5240, GNorm = 0.6087, lr_0 = 1.4507e-04
Loss = 7.6414e-02, PNorm = 84.5253, GNorm = 0.7115, lr_0 = 1.4497e-04
Loss = 7.4953e-02, PNorm = 84.5285, GNorm = 0.5107, lr_0 = 1.4487e-04
Loss = 7.9418e-02, PNorm = 84.5325, GNorm = 0.6722, lr_0 = 1.4477e-04
Loss = 8.3957e-02, PNorm = 84.5366, GNorm = 0.7319, lr_0 = 1.4467e-04
Loss = 7.6923e-02, PNorm = 84.5384, GNorm = 0.5628, lr_0 = 1.4457e-04
Loss = 9.5386e-02, PNorm = 84.5418, GNorm = 0.6103, lr_0 = 1.4447e-04
Loss = 9.1846e-02, PNorm = 84.5427, GNorm = 0.5917, lr_0 = 1.4438e-04
Loss = 7.9197e-02, PNorm = 84.5432, GNorm = 0.5250, lr_0 = 1.4428e-04
Loss = 8.1423e-02, PNorm = 84.5449, GNorm = 0.7303, lr_0 = 1.4418e-04
Loss = 7.8771e-02, PNorm = 84.5463, GNorm = 0.6740, lr_0 = 1.4408e-04
Loss = 7.9291e-02, PNorm = 84.5471, GNorm = 0.7540, lr_0 = 1.4398e-04
Loss = 7.9615e-02, PNorm = 84.5499, GNorm = 0.6325, lr_0 = 1.4388e-04
Loss = 8.9396e-02, PNorm = 84.5521, GNorm = 0.5008, lr_0 = 1.4378e-04
Loss = 8.2556e-02, PNorm = 84.5558, GNorm = 0.6538, lr_0 = 1.4368e-04
Loss = 9.7594e-02, PNorm = 84.5591, GNorm = 0.8833, lr_0 = 1.4359e-04
Loss = 8.2747e-02, PNorm = 84.5612, GNorm = 0.8156, lr_0 = 1.4349e-04
Loss = 8.4532e-02, PNorm = 84.5620, GNorm = 0.5988, lr_0 = 1.4339e-04
Loss = 7.9461e-02, PNorm = 84.5639, GNorm = 0.8407, lr_0 = 1.4329e-04
Loss = 9.1759e-02, PNorm = 84.5640, GNorm = 0.7857, lr_0 = 1.4319e-04
Loss = 8.0815e-02, PNorm = 84.5671, GNorm = 0.7615, lr_0 = 1.4310e-04
Loss = 7.9531e-02, PNorm = 84.5707, GNorm = 0.6098, lr_0 = 1.4300e-04
Loss = 7.8036e-02, PNorm = 84.5735, GNorm = 0.8859, lr_0 = 1.4290e-04
Loss = 9.1801e-02, PNorm = 84.5772, GNorm = 0.4894, lr_0 = 1.4280e-04
Loss = 9.3659e-02, PNorm = 84.5784, GNorm = 0.6582, lr_0 = 1.4270e-04
Loss = 7.9345e-02, PNorm = 84.5814, GNorm = 0.6803, lr_0 = 1.4261e-04
Loss = 7.7948e-02, PNorm = 84.5840, GNorm = 0.5314, lr_0 = 1.4251e-04
Loss = 8.0923e-02, PNorm = 84.5848, GNorm = 0.5235, lr_0 = 1.4241e-04
Loss = 8.4839e-02, PNorm = 84.5913, GNorm = 0.5323, lr_0 = 1.4231e-04
Loss = 8.1774e-02, PNorm = 84.5951, GNorm = 0.5437, lr_0 = 1.4222e-04
Loss = 8.0269e-02, PNorm = 84.5973, GNorm = 0.4582, lr_0 = 1.4212e-04
Loss = 7.2566e-02, PNorm = 84.6025, GNorm = 0.6266, lr_0 = 1.4202e-04
Loss = 7.3582e-02, PNorm = 84.6053, GNorm = 0.6500, lr_0 = 1.4192e-04
Loss = 8.2599e-02, PNorm = 84.6062, GNorm = 0.6500, lr_0 = 1.4183e-04
Loss = 8.1339e-02, PNorm = 84.6086, GNorm = 0.8046, lr_0 = 1.4173e-04
Loss = 7.4574e-02, PNorm = 84.6128, GNorm = 0.7590, lr_0 = 1.4163e-04
Loss = 8.8405e-02, PNorm = 84.6148, GNorm = 1.0035, lr_0 = 1.4153e-04
Loss = 7.7927e-02, PNorm = 84.6159, GNorm = 0.8836, lr_0 = 1.4144e-04
Loss = 7.2874e-02, PNorm = 84.6173, GNorm = 0.5073, lr_0 = 1.4134e-04
Loss = 7.7692e-02, PNorm = 84.6191, GNorm = 0.6264, lr_0 = 1.4124e-04
Loss = 7.3392e-02, PNorm = 84.6206, GNorm = 0.4712, lr_0 = 1.4115e-04
Loss = 7.3473e-02, PNorm = 84.6232, GNorm = 0.6683, lr_0 = 1.4105e-04
Loss = 8.5935e-02, PNorm = 84.6248, GNorm = 0.5710, lr_0 = 1.4095e-04
Loss = 7.1031e-02, PNorm = 84.6269, GNorm = 0.4361, lr_0 = 1.4086e-04
Loss = 7.4151e-02, PNorm = 84.6294, GNorm = 0.5328, lr_0 = 1.4076e-04
Loss = 8.1011e-02, PNorm = 84.6314, GNorm = 0.5913, lr_0 = 1.4066e-04
Loss = 8.4279e-02, PNorm = 84.6325, GNorm = 0.7135, lr_0 = 1.4057e-04
Loss = 8.5616e-02, PNorm = 84.6341, GNorm = 0.6050, lr_0 = 1.4047e-04
Loss = 7.8189e-02, PNorm = 84.6360, GNorm = 0.6887, lr_0 = 1.4038e-04
Loss = 9.2212e-02, PNorm = 84.6380, GNorm = 0.9965, lr_0 = 1.4028e-04
Loss = 8.0707e-02, PNorm = 84.6397, GNorm = 0.7301, lr_0 = 1.4018e-04
Loss = 7.9713e-02, PNorm = 84.6429, GNorm = 0.6415, lr_0 = 1.4009e-04
Loss = 9.0239e-02, PNorm = 84.6463, GNorm = 0.7072, lr_0 = 1.3999e-04
Loss = 7.4330e-02, PNorm = 84.6487, GNorm = 0.4780, lr_0 = 1.3990e-04
Loss = 7.9076e-02, PNorm = 84.6512, GNorm = 0.6481, lr_0 = 1.3980e-04
Loss = 7.9679e-02, PNorm = 84.6526, GNorm = 0.5506, lr_0 = 1.3970e-04
Loss = 9.4842e-02, PNorm = 84.6524, GNorm = 0.6154, lr_0 = 1.3961e-04
Loss = 7.0997e-02, PNorm = 84.6532, GNorm = 0.4304, lr_0 = 1.3951e-04
Loss = 8.7544e-02, PNorm = 84.6557, GNorm = 0.5541, lr_0 = 1.3942e-04
Loss = 8.1103e-02, PNorm = 84.6564, GNorm = 0.6431, lr_0 = 1.3932e-04
Loss = 7.8847e-02, PNorm = 84.6571, GNorm = 0.5422, lr_0 = 1.3923e-04
Loss = 7.7227e-02, PNorm = 84.6595, GNorm = 0.6480, lr_0 = 1.3913e-04
Loss = 7.6971e-02, PNorm = 84.6607, GNorm = 0.7697, lr_0 = 1.3904e-04
Loss = 7.9424e-02, PNorm = 84.6609, GNorm = 0.7006, lr_0 = 1.3894e-04
Validation mae = 0.226164
Epoch 26
Loss = 7.6061e-02, PNorm = 84.6627, GNorm = 0.6244, lr_0 = 1.3884e-04
Loss = 7.3263e-02, PNorm = 84.6654, GNorm = 0.5331, lr_0 = 1.3875e-04
Loss = 7.8516e-02, PNorm = 84.6677, GNorm = 0.5708, lr_0 = 1.3865e-04
Loss = 8.3568e-02, PNorm = 84.6694, GNorm = 0.4770, lr_0 = 1.3856e-04
Loss = 8.5216e-02, PNorm = 84.6727, GNorm = 0.5808, lr_0 = 1.3846e-04
Loss = 8.7828e-02, PNorm = 84.6733, GNorm = 0.6251, lr_0 = 1.3837e-04
Loss = 7.4009e-02, PNorm = 84.6761, GNorm = 0.6444, lr_0 = 1.3828e-04
Loss = 7.3271e-02, PNorm = 84.6780, GNorm = 0.5536, lr_0 = 1.3818e-04
Loss = 8.1109e-02, PNorm = 84.6806, GNorm = 0.6814, lr_0 = 1.3809e-04
Loss = 8.0610e-02, PNorm = 84.6845, GNorm = 0.7316, lr_0 = 1.3799e-04
Loss = 7.7677e-02, PNorm = 84.6880, GNorm = 0.8774, lr_0 = 1.3790e-04
Loss = 7.6542e-02, PNorm = 84.6913, GNorm = 0.7646, lr_0 = 1.3780e-04
Loss = 7.7957e-02, PNorm = 84.6963, GNorm = 0.6192, lr_0 = 1.3771e-04
Loss = 7.9171e-02, PNorm = 84.6993, GNorm = 0.4961, lr_0 = 1.3761e-04
Loss = 7.5037e-02, PNorm = 84.7028, GNorm = 0.7960, lr_0 = 1.3752e-04
Loss = 7.5861e-02, PNorm = 84.7061, GNorm = 0.6073, lr_0 = 1.3742e-04
Loss = 7.7973e-02, PNorm = 84.7078, GNorm = 0.7294, lr_0 = 1.3733e-04
Loss = 7.8816e-02, PNorm = 84.7083, GNorm = 0.5855, lr_0 = 1.3724e-04
Loss = 7.1513e-02, PNorm = 84.7099, GNorm = 0.6793, lr_0 = 1.3714e-04
Loss = 7.9543e-02, PNorm = 84.7118, GNorm = 0.7374, lr_0 = 1.3705e-04
Loss = 7.3429e-02, PNorm = 84.7139, GNorm = 0.6750, lr_0 = 1.3695e-04
Loss = 7.8486e-02, PNorm = 84.7158, GNorm = 0.7306, lr_0 = 1.3686e-04
Loss = 7.7897e-02, PNorm = 84.7169, GNorm = 0.6148, lr_0 = 1.3677e-04
Loss = 7.3487e-02, PNorm = 84.7209, GNorm = 0.6885, lr_0 = 1.3667e-04
Loss = 8.1077e-02, PNorm = 84.7232, GNorm = 0.5901, lr_0 = 1.3658e-04
Loss = 7.4762e-02, PNorm = 84.7240, GNorm = 0.6374, lr_0 = 1.3649e-04
Loss = 8.2480e-02, PNorm = 84.7256, GNorm = 0.6719, lr_0 = 1.3639e-04
Loss = 7.5960e-02, PNorm = 84.7267, GNorm = 0.7095, lr_0 = 1.3630e-04
Loss = 7.1742e-02, PNorm = 84.7286, GNorm = 0.5700, lr_0 = 1.3621e-04
Loss = 8.2586e-02, PNorm = 84.7306, GNorm = 0.8819, lr_0 = 1.3611e-04
Loss = 6.5940e-02, PNorm = 84.7321, GNorm = 0.5588, lr_0 = 1.3602e-04
Loss = 7.7384e-02, PNorm = 84.7322, GNorm = 0.6574, lr_0 = 1.3593e-04
Loss = 8.8213e-02, PNorm = 84.7342, GNorm = 0.9481, lr_0 = 1.3583e-04
Loss = 7.4887e-02, PNorm = 84.7379, GNorm = 0.5483, lr_0 = 1.3574e-04
Loss = 6.8807e-02, PNorm = 84.7413, GNorm = 0.5534, lr_0 = 1.3565e-04
Loss = 9.7556e-02, PNorm = 84.7425, GNorm = 0.7665, lr_0 = 1.3555e-04
Loss = 7.9929e-02, PNorm = 84.7444, GNorm = 0.4794, lr_0 = 1.3546e-04
Loss = 7.5379e-02, PNorm = 84.7443, GNorm = 0.7513, lr_0 = 1.3537e-04
Loss = 6.9529e-02, PNorm = 84.7467, GNorm = 0.5798, lr_0 = 1.3528e-04
Loss = 7.5934e-02, PNorm = 84.7498, GNorm = 0.4274, lr_0 = 1.3518e-04
Loss = 7.3449e-02, PNorm = 84.7513, GNorm = 0.6469, lr_0 = 1.3509e-04
Loss = 7.8453e-02, PNorm = 84.7505, GNorm = 0.7634, lr_0 = 1.3500e-04
Loss = 7.5591e-02, PNorm = 84.7513, GNorm = 0.5400, lr_0 = 1.3491e-04
Loss = 7.6364e-02, PNorm = 84.7525, GNorm = 0.5221, lr_0 = 1.3481e-04
Loss = 7.4788e-02, PNorm = 84.7558, GNorm = 0.5514, lr_0 = 1.3472e-04
Loss = 7.9846e-02, PNorm = 84.7580, GNorm = 0.7708, lr_0 = 1.3463e-04
Loss = 7.5746e-02, PNorm = 84.7609, GNorm = 0.5871, lr_0 = 1.3454e-04
Loss = 7.0819e-02, PNorm = 84.7634, GNorm = 0.7261, lr_0 = 1.3444e-04
Loss = 7.3301e-02, PNorm = 84.7644, GNorm = 0.5680, lr_0 = 1.3435e-04
Loss = 7.8960e-02, PNorm = 84.7662, GNorm = 0.6930, lr_0 = 1.3426e-04
Loss = 8.2307e-02, PNorm = 84.7684, GNorm = 0.5761, lr_0 = 1.3417e-04
Loss = 7.9427e-02, PNorm = 84.7716, GNorm = 0.7754, lr_0 = 1.3408e-04
Loss = 9.0627e-02, PNorm = 84.7737, GNorm = 0.5680, lr_0 = 1.3398e-04
Loss = 8.5948e-02, PNorm = 84.7761, GNorm = 0.5855, lr_0 = 1.3389e-04
Loss = 7.5440e-02, PNorm = 84.7787, GNorm = 0.5375, lr_0 = 1.3380e-04
Loss = 7.2302e-02, PNorm = 84.7801, GNorm = 0.7864, lr_0 = 1.3371e-04
Loss = 8.8659e-02, PNorm = 84.7828, GNorm = 0.5432, lr_0 = 1.3362e-04
Loss = 7.7341e-02, PNorm = 84.7858, GNorm = 0.7634, lr_0 = 1.3353e-04
Loss = 8.6604e-02, PNorm = 84.7907, GNorm = 0.6938, lr_0 = 1.3343e-04
Loss = 8.0749e-02, PNorm = 84.7923, GNorm = 0.6038, lr_0 = 1.3334e-04
Loss = 7.1116e-02, PNorm = 84.7924, GNorm = 0.5878, lr_0 = 1.3325e-04
Loss = 8.3085e-02, PNorm = 84.7943, GNorm = 0.6272, lr_0 = 1.3316e-04
Loss = 9.2398e-02, PNorm = 84.7960, GNorm = 0.5892, lr_0 = 1.3307e-04
Loss = 8.6959e-02, PNorm = 84.7973, GNorm = 0.8898, lr_0 = 1.3298e-04
Loss = 7.5571e-02, PNorm = 84.7995, GNorm = 0.4851, lr_0 = 1.3289e-04
Loss = 9.1014e-02, PNorm = 84.7998, GNorm = 0.6242, lr_0 = 1.3280e-04
Loss = 7.6776e-02, PNorm = 84.8011, GNorm = 0.6444, lr_0 = 1.3270e-04
Loss = 7.1002e-02, PNorm = 84.8049, GNorm = 0.5464, lr_0 = 1.3261e-04
Loss = 7.5318e-02, PNorm = 84.8089, GNorm = 0.7936, lr_0 = 1.3252e-04
Loss = 8.5996e-02, PNorm = 84.8095, GNorm = 0.6978, lr_0 = 1.3243e-04
Loss = 7.1630e-02, PNorm = 84.8101, GNorm = 0.5495, lr_0 = 1.3234e-04
Loss = 8.5114e-02, PNorm = 84.8139, GNorm = 0.6623, lr_0 = 1.3225e-04
Loss = 8.5359e-02, PNorm = 84.8165, GNorm = 0.7936, lr_0 = 1.3216e-04
Loss = 8.2953e-02, PNorm = 84.8191, GNorm = 0.7352, lr_0 = 1.3207e-04
Loss = 8.3205e-02, PNorm = 84.8202, GNorm = 0.6481, lr_0 = 1.3198e-04
Loss = 7.7645e-02, PNorm = 84.8235, GNorm = 0.5325, lr_0 = 1.3189e-04
Loss = 7.5055e-02, PNorm = 84.8263, GNorm = 0.5712, lr_0 = 1.3180e-04
Loss = 7.6884e-02, PNorm = 84.8285, GNorm = 0.6677, lr_0 = 1.3171e-04
Loss = 8.3745e-02, PNorm = 84.8307, GNorm = 0.5543, lr_0 = 1.3162e-04
Loss = 8.3225e-02, PNorm = 84.8331, GNorm = 0.6524, lr_0 = 1.3153e-04
Loss = 8.7970e-02, PNorm = 84.8356, GNorm = 0.7679, lr_0 = 1.3144e-04
Loss = 8.9620e-02, PNorm = 84.8377, GNorm = 0.7659, lr_0 = 1.3135e-04
Loss = 7.4066e-02, PNorm = 84.8393, GNorm = 0.6046, lr_0 = 1.3126e-04
Loss = 7.4899e-02, PNorm = 84.8415, GNorm = 0.5073, lr_0 = 1.3117e-04
Loss = 8.3345e-02, PNorm = 84.8431, GNorm = 0.8903, lr_0 = 1.3108e-04
Loss = 7.1837e-02, PNorm = 84.8459, GNorm = 0.5229, lr_0 = 1.3099e-04
Loss = 7.0512e-02, PNorm = 84.8483, GNorm = 0.5471, lr_0 = 1.3090e-04
Loss = 7.1304e-02, PNorm = 84.8488, GNorm = 0.5554, lr_0 = 1.3081e-04
Loss = 7.1384e-02, PNorm = 84.8506, GNorm = 0.5825, lr_0 = 1.3072e-04
Loss = 8.2732e-02, PNorm = 84.8522, GNorm = 0.7030, lr_0 = 1.3063e-04
Loss = 7.7828e-02, PNorm = 84.8548, GNorm = 0.6471, lr_0 = 1.3054e-04
Loss = 7.9285e-02, PNorm = 84.8579, GNorm = 0.4975, lr_0 = 1.3045e-04
Loss = 7.7079e-02, PNorm = 84.8579, GNorm = 0.7259, lr_0 = 1.3036e-04
Loss = 8.1444e-02, PNorm = 84.8576, GNorm = 0.5090, lr_0 = 1.3027e-04
Loss = 8.4373e-02, PNorm = 84.8599, GNorm = 0.5718, lr_0 = 1.3018e-04
Loss = 7.6981e-02, PNorm = 84.8627, GNorm = 0.6154, lr_0 = 1.3009e-04
Loss = 7.2988e-02, PNorm = 84.8650, GNorm = 1.0717, lr_0 = 1.3000e-04
Loss = 8.8394e-02, PNorm = 84.8676, GNorm = 0.8227, lr_0 = 1.2992e-04
Loss = 9.1487e-02, PNorm = 84.8679, GNorm = 0.6289, lr_0 = 1.2983e-04
Loss = 7.6503e-02, PNorm = 84.8709, GNorm = 0.5613, lr_0 = 1.2974e-04
Loss = 7.4103e-02, PNorm = 84.8732, GNorm = 0.5421, lr_0 = 1.2965e-04
Loss = 7.1454e-02, PNorm = 84.8749, GNorm = 0.6096, lr_0 = 1.2956e-04
Loss = 8.2279e-02, PNorm = 84.8779, GNorm = 0.6120, lr_0 = 1.2947e-04
Loss = 7.8176e-02, PNorm = 84.8796, GNorm = 0.5902, lr_0 = 1.2938e-04
Loss = 8.8934e-02, PNorm = 84.8814, GNorm = 0.6554, lr_0 = 1.2929e-04
Loss = 7.7789e-02, PNorm = 84.8837, GNorm = 0.5602, lr_0 = 1.2921e-04
Loss = 8.8073e-02, PNorm = 84.8845, GNorm = 0.8433, lr_0 = 1.2912e-04
Loss = 8.6759e-02, PNorm = 84.8843, GNorm = 0.7062, lr_0 = 1.2903e-04
Loss = 8.5694e-02, PNorm = 84.8866, GNorm = 0.5967, lr_0 = 1.2894e-04
Loss = 7.5904e-02, PNorm = 84.8881, GNorm = 0.7408, lr_0 = 1.2885e-04
Loss = 7.9782e-02, PNorm = 84.8896, GNorm = 0.7423, lr_0 = 1.2876e-04
Loss = 7.9266e-02, PNorm = 84.8929, GNorm = 0.6392, lr_0 = 1.2867e-04
Loss = 8.2607e-02, PNorm = 84.8971, GNorm = 0.6948, lr_0 = 1.2859e-04
Loss = 7.8800e-02, PNorm = 84.8997, GNorm = 0.5061, lr_0 = 1.2850e-04
Loss = 8.0096e-02, PNorm = 84.9009, GNorm = 0.9938, lr_0 = 1.2841e-04
Loss = 9.2000e-02, PNorm = 84.9028, GNorm = 0.6613, lr_0 = 1.2832e-04
Loss = 7.5494e-02, PNorm = 84.9039, GNorm = 0.5627, lr_0 = 1.2823e-04
Loss = 7.3580e-02, PNorm = 84.9068, GNorm = 0.6217, lr_0 = 1.2815e-04
Loss = 8.1855e-02, PNorm = 84.9095, GNorm = 0.5478, lr_0 = 1.2806e-04
Loss = 8.7825e-02, PNorm = 84.9100, GNorm = 0.6306, lr_0 = 1.2797e-04
Validation mae = 0.226451
Epoch 27
Loss = 8.0457e-02, PNorm = 84.9103, GNorm = 0.7355, lr_0 = 1.2788e-04
Loss = 8.2403e-02, PNorm = 84.9123, GNorm = 0.7021, lr_0 = 1.2780e-04
Loss = 7.8691e-02, PNorm = 84.9152, GNorm = 0.6337, lr_0 = 1.2771e-04
Loss = 7.8602e-02, PNorm = 84.9173, GNorm = 0.6397, lr_0 = 1.2762e-04
Loss = 6.7680e-02, PNorm = 84.9194, GNorm = 0.6390, lr_0 = 1.2753e-04
Loss = 7.2233e-02, PNorm = 84.9212, GNorm = 0.6803, lr_0 = 1.2745e-04
Loss = 7.7318e-02, PNorm = 84.9242, GNorm = 0.5178, lr_0 = 1.2736e-04
Loss = 7.5091e-02, PNorm = 84.9270, GNorm = 0.6491, lr_0 = 1.2727e-04
Loss = 8.8614e-02, PNorm = 84.9267, GNorm = 0.9719, lr_0 = 1.2718e-04
Loss = 7.5736e-02, PNorm = 84.9306, GNorm = 0.7459, lr_0 = 1.2710e-04
Loss = 7.3422e-02, PNorm = 84.9321, GNorm = 0.7328, lr_0 = 1.2701e-04
Loss = 7.8859e-02, PNorm = 84.9329, GNorm = 0.6625, lr_0 = 1.2692e-04
Loss = 8.4041e-02, PNorm = 84.9350, GNorm = 0.7332, lr_0 = 1.2684e-04
Loss = 7.6156e-02, PNorm = 84.9369, GNorm = 0.5431, lr_0 = 1.2675e-04
Loss = 7.4772e-02, PNorm = 84.9387, GNorm = 0.7012, lr_0 = 1.2666e-04
Loss = 7.6431e-02, PNorm = 84.9382, GNorm = 0.6882, lr_0 = 1.2658e-04
Loss = 8.4003e-02, PNorm = 84.9388, GNorm = 0.7779, lr_0 = 1.2649e-04
Loss = 7.6409e-02, PNorm = 84.9410, GNorm = 0.7227, lr_0 = 1.2640e-04
Loss = 7.3181e-02, PNorm = 84.9441, GNorm = 0.7077, lr_0 = 1.2632e-04
Loss = 7.7270e-02, PNorm = 84.9467, GNorm = 0.7497, lr_0 = 1.2623e-04
Loss = 6.9212e-02, PNorm = 84.9500, GNorm = 0.6295, lr_0 = 1.2614e-04
Loss = 7.9654e-02, PNorm = 84.9513, GNorm = 0.5638, lr_0 = 1.2606e-04
Loss = 7.6346e-02, PNorm = 84.9523, GNorm = 0.5217, lr_0 = 1.2597e-04
Loss = 7.8286e-02, PNorm = 84.9537, GNorm = 0.7181, lr_0 = 1.2588e-04
Loss = 8.6530e-02, PNorm = 84.9563, GNorm = 0.7897, lr_0 = 1.2580e-04
Loss = 6.9054e-02, PNorm = 84.9598, GNorm = 0.7654, lr_0 = 1.2571e-04
Loss = 8.8109e-02, PNorm = 84.9621, GNorm = 0.7270, lr_0 = 1.2563e-04
Loss = 7.6834e-02, PNorm = 84.9635, GNorm = 0.5921, lr_0 = 1.2554e-04
Loss = 8.4134e-02, PNorm = 84.9638, GNorm = 0.8420, lr_0 = 1.2545e-04
Loss = 7.4485e-02, PNorm = 84.9654, GNorm = 0.6172, lr_0 = 1.2537e-04
Loss = 6.7025e-02, PNorm = 84.9680, GNorm = 0.4914, lr_0 = 1.2528e-04
Loss = 7.4165e-02, PNorm = 84.9696, GNorm = 0.7019, lr_0 = 1.2520e-04
Loss = 7.0039e-02, PNorm = 84.9715, GNorm = 0.5276, lr_0 = 1.2511e-04
Loss = 8.0339e-02, PNorm = 84.9727, GNorm = 0.6683, lr_0 = 1.2502e-04
Loss = 7.1688e-02, PNorm = 84.9736, GNorm = 0.6432, lr_0 = 1.2494e-04
Loss = 9.4079e-02, PNorm = 84.9761, GNorm = 0.6622, lr_0 = 1.2485e-04
Loss = 7.1354e-02, PNorm = 84.9786, GNorm = 0.7892, lr_0 = 1.2477e-04
Loss = 8.2746e-02, PNorm = 84.9794, GNorm = 0.8321, lr_0 = 1.2468e-04
Loss = 7.5481e-02, PNorm = 84.9813, GNorm = 0.6659, lr_0 = 1.2460e-04
Loss = 7.0667e-02, PNorm = 84.9834, GNorm = 0.6575, lr_0 = 1.2451e-04
Loss = 7.4835e-02, PNorm = 84.9856, GNorm = 0.5772, lr_0 = 1.2443e-04
Loss = 8.7496e-02, PNorm = 84.9884, GNorm = 0.6886, lr_0 = 1.2434e-04
Loss = 7.1194e-02, PNorm = 84.9895, GNorm = 0.8666, lr_0 = 1.2426e-04
Loss = 7.4818e-02, PNorm = 84.9901, GNorm = 0.6764, lr_0 = 1.2417e-04
Loss = 7.1803e-02, PNorm = 84.9928, GNorm = 0.7404, lr_0 = 1.2409e-04
Loss = 7.6766e-02, PNorm = 84.9963, GNorm = 0.7249, lr_0 = 1.2400e-04
Loss = 8.7585e-02, PNorm = 84.9998, GNorm = 0.6920, lr_0 = 1.2392e-04
Loss = 7.3701e-02, PNorm = 85.0003, GNorm = 0.6048, lr_0 = 1.2383e-04
Loss = 7.8517e-02, PNorm = 85.0016, GNorm = 0.9115, lr_0 = 1.2375e-04
Loss = 7.5916e-02, PNorm = 85.0040, GNorm = 0.5776, lr_0 = 1.2366e-04
Loss = 7.3462e-02, PNorm = 85.0045, GNorm = 0.5668, lr_0 = 1.2358e-04
Loss = 8.3939e-02, PNorm = 85.0066, GNorm = 0.6834, lr_0 = 1.2349e-04
Loss = 8.2731e-02, PNorm = 85.0091, GNorm = 0.5123, lr_0 = 1.2341e-04
Loss = 7.7704e-02, PNorm = 85.0098, GNorm = 1.1078, lr_0 = 1.2332e-04
Loss = 6.9423e-02, PNorm = 85.0108, GNorm = 0.6008, lr_0 = 1.2324e-04
Loss = 7.5383e-02, PNorm = 85.0132, GNorm = 0.6004, lr_0 = 1.2315e-04
Loss = 7.6694e-02, PNorm = 85.0140, GNorm = 0.5844, lr_0 = 1.2307e-04
Loss = 7.1141e-02, PNorm = 85.0166, GNorm = 0.6248, lr_0 = 1.2298e-04
Loss = 7.3938e-02, PNorm = 85.0197, GNorm = 0.4804, lr_0 = 1.2290e-04
Loss = 7.9153e-02, PNorm = 85.0219, GNorm = 0.7953, lr_0 = 1.2282e-04
Loss = 7.1665e-02, PNorm = 85.0237, GNorm = 0.5971, lr_0 = 1.2273e-04
Loss = 6.6357e-02, PNorm = 85.0261, GNorm = 0.4841, lr_0 = 1.2265e-04
Loss = 7.4051e-02, PNorm = 85.0272, GNorm = 0.4790, lr_0 = 1.2256e-04
Loss = 8.3629e-02, PNorm = 85.0282, GNorm = 0.5024, lr_0 = 1.2248e-04
Loss = 6.7017e-02, PNorm = 85.0303, GNorm = 0.6442, lr_0 = 1.2240e-04
Loss = 7.5206e-02, PNorm = 85.0341, GNorm = 0.6075, lr_0 = 1.2231e-04
Loss = 8.7556e-02, PNorm = 85.0349, GNorm = 0.7851, lr_0 = 1.2223e-04
Loss = 7.6515e-02, PNorm = 85.0371, GNorm = 0.6275, lr_0 = 1.2214e-04
Loss = 9.9735e-02, PNorm = 85.0404, GNorm = 0.8567, lr_0 = 1.2206e-04
Loss = 8.5023e-02, PNorm = 85.0427, GNorm = 0.6106, lr_0 = 1.2198e-04
Loss = 8.7628e-02, PNorm = 85.0453, GNorm = 0.5759, lr_0 = 1.2189e-04
Loss = 7.4641e-02, PNorm = 85.0474, GNorm = 0.4613, lr_0 = 1.2181e-04
Loss = 7.2737e-02, PNorm = 85.0478, GNorm = 0.5884, lr_0 = 1.2173e-04
Loss = 7.6370e-02, PNorm = 85.0483, GNorm = 0.6215, lr_0 = 1.2164e-04
Loss = 7.7727e-02, PNorm = 85.0497, GNorm = 0.6763, lr_0 = 1.2156e-04
Loss = 7.2767e-02, PNorm = 85.0511, GNorm = 0.6924, lr_0 = 1.2148e-04
Loss = 7.5592e-02, PNorm = 85.0525, GNorm = 0.6530, lr_0 = 1.2139e-04
Loss = 7.6237e-02, PNorm = 85.0539, GNorm = 0.7595, lr_0 = 1.2131e-04
Loss = 7.2207e-02, PNorm = 85.0564, GNorm = 0.5236, lr_0 = 1.2123e-04
Loss = 7.4153e-02, PNorm = 85.0586, GNorm = 0.6194, lr_0 = 1.2114e-04
Loss = 8.0308e-02, PNorm = 85.0615, GNorm = 0.6039, lr_0 = 1.2106e-04
Loss = 7.8941e-02, PNorm = 85.0641, GNorm = 0.6098, lr_0 = 1.2098e-04
Loss = 7.3053e-02, PNorm = 85.0662, GNorm = 0.5301, lr_0 = 1.2090e-04
Loss = 7.2328e-02, PNorm = 85.0680, GNorm = 0.6717, lr_0 = 1.2081e-04
Loss = 7.2563e-02, PNorm = 85.0710, GNorm = 0.6562, lr_0 = 1.2073e-04
Loss = 7.7295e-02, PNorm = 85.0713, GNorm = 0.6152, lr_0 = 1.2065e-04
Loss = 8.5087e-02, PNorm = 85.0720, GNorm = 0.6188, lr_0 = 1.2056e-04
Loss = 8.1185e-02, PNorm = 85.0745, GNorm = 0.5612, lr_0 = 1.2048e-04
Loss = 7.7221e-02, PNorm = 85.0768, GNorm = 0.6713, lr_0 = 1.2040e-04
Loss = 7.8766e-02, PNorm = 85.0772, GNorm = 0.5642, lr_0 = 1.2032e-04
Loss = 7.9641e-02, PNorm = 85.0788, GNorm = 0.6751, lr_0 = 1.2023e-04
Loss = 7.3435e-02, PNorm = 85.0820, GNorm = 0.6025, lr_0 = 1.2015e-04
Loss = 7.5696e-02, PNorm = 85.0855, GNorm = 0.8015, lr_0 = 1.2007e-04
Loss = 8.9881e-02, PNorm = 85.0873, GNorm = 0.7507, lr_0 = 1.1999e-04
Loss = 7.5123e-02, PNorm = 85.0877, GNorm = 0.7529, lr_0 = 1.1991e-04
Loss = 8.2035e-02, PNorm = 85.0878, GNorm = 0.6689, lr_0 = 1.1982e-04
Loss = 7.7185e-02, PNorm = 85.0886, GNorm = 0.6412, lr_0 = 1.1974e-04
Loss = 7.8517e-02, PNorm = 85.0899, GNorm = 0.8284, lr_0 = 1.1966e-04
Loss = 7.9104e-02, PNorm = 85.0919, GNorm = 0.4957, lr_0 = 1.1958e-04
Loss = 7.4010e-02, PNorm = 85.0926, GNorm = 0.6505, lr_0 = 1.1950e-04
Loss = 8.2542e-02, PNorm = 85.0943, GNorm = 0.4915, lr_0 = 1.1941e-04
Loss = 7.3288e-02, PNorm = 85.0935, GNorm = 0.6275, lr_0 = 1.1933e-04
Loss = 7.8887e-02, PNorm = 85.0944, GNorm = 0.6151, lr_0 = 1.1925e-04
Loss = 7.8770e-02, PNorm = 85.0951, GNorm = 0.6221, lr_0 = 1.1917e-04
Loss = 8.1081e-02, PNorm = 85.0969, GNorm = 0.6580, lr_0 = 1.1909e-04
Loss = 6.7946e-02, PNorm = 85.0972, GNorm = 0.7104, lr_0 = 1.1901e-04
Loss = 7.6546e-02, PNorm = 85.0981, GNorm = 0.7833, lr_0 = 1.1892e-04
Loss = 8.0219e-02, PNorm = 85.0994, GNorm = 0.6622, lr_0 = 1.1884e-04
Loss = 8.0450e-02, PNorm = 85.1013, GNorm = 0.6226, lr_0 = 1.1876e-04
Loss = 7.2080e-02, PNorm = 85.1024, GNorm = 0.5480, lr_0 = 1.1868e-04
Loss = 7.0175e-02, PNorm = 85.1034, GNorm = 0.6204, lr_0 = 1.1860e-04
Loss = 7.4598e-02, PNorm = 85.1045, GNorm = 0.6864, lr_0 = 1.1852e-04
Loss = 8.5225e-02, PNorm = 85.1049, GNorm = 0.6591, lr_0 = 1.1844e-04
Loss = 6.6322e-02, PNorm = 85.1053, GNorm = 0.6088, lr_0 = 1.1835e-04
Loss = 8.1390e-02, PNorm = 85.1073, GNorm = 0.5035, lr_0 = 1.1827e-04
Loss = 8.7686e-02, PNorm = 85.1090, GNorm = 0.5777, lr_0 = 1.1819e-04
Loss = 8.1835e-02, PNorm = 85.1116, GNorm = 0.7628, lr_0 = 1.1811e-04
Loss = 7.3934e-02, PNorm = 85.1145, GNorm = 0.8059, lr_0 = 1.1803e-04
Loss = 7.5800e-02, PNorm = 85.1175, GNorm = 0.6918, lr_0 = 1.1795e-04
Loss = 7.8924e-02, PNorm = 85.1206, GNorm = 0.5011, lr_0 = 1.1787e-04
Validation mae = 0.226767
Epoch 28
Loss = 8.0708e-02, PNorm = 85.1223, GNorm = 0.5755, lr_0 = 1.1779e-04
Loss = 7.3033e-02, PNorm = 85.1247, GNorm = 0.7740, lr_0 = 1.1771e-04
Loss = 6.8906e-02, PNorm = 85.1277, GNorm = 0.5766, lr_0 = 1.1763e-04
Loss = 7.5535e-02, PNorm = 85.1296, GNorm = 0.5394, lr_0 = 1.1755e-04
Loss = 7.4764e-02, PNorm = 85.1315, GNorm = 0.6389, lr_0 = 1.1747e-04
Loss = 6.9438e-02, PNorm = 85.1336, GNorm = 0.6342, lr_0 = 1.1739e-04
Loss = 8.4910e-02, PNorm = 85.1361, GNorm = 0.5747, lr_0 = 1.1730e-04
Loss = 8.0774e-02, PNorm = 85.1374, GNorm = 0.7966, lr_0 = 1.1722e-04
Loss = 6.9972e-02, PNorm = 85.1391, GNorm = 0.6000, lr_0 = 1.1714e-04
Loss = 7.4043e-02, PNorm = 85.1412, GNorm = 0.7367, lr_0 = 1.1706e-04
Loss = 6.5312e-02, PNorm = 85.1443, GNorm = 0.5057, lr_0 = 1.1698e-04
Loss = 7.0954e-02, PNorm = 85.1448, GNorm = 0.7230, lr_0 = 1.1690e-04
Loss = 7.7392e-02, PNorm = 85.1445, GNorm = 0.7410, lr_0 = 1.1682e-04
Loss = 7.3253e-02, PNorm = 85.1454, GNorm = 0.5005, lr_0 = 1.1674e-04
Loss = 6.9165e-02, PNorm = 85.1468, GNorm = 0.7349, lr_0 = 1.1666e-04
Loss = 7.7474e-02, PNorm = 85.1482, GNorm = 0.5455, lr_0 = 1.1658e-04
Loss = 7.3942e-02, PNorm = 85.1502, GNorm = 0.5071, lr_0 = 1.1650e-04
Loss = 8.4912e-02, PNorm = 85.1540, GNorm = 0.7050, lr_0 = 1.1642e-04
Loss = 8.0830e-02, PNorm = 85.1566, GNorm = 0.7274, lr_0 = 1.1634e-04
Loss = 7.6010e-02, PNorm = 85.1575, GNorm = 0.5969, lr_0 = 1.1626e-04
Loss = 7.4821e-02, PNorm = 85.1590, GNorm = 0.6218, lr_0 = 1.1618e-04
Loss = 6.5991e-02, PNorm = 85.1596, GNorm = 0.5047, lr_0 = 1.1611e-04
Loss = 7.7798e-02, PNorm = 85.1612, GNorm = 0.6585, lr_0 = 1.1603e-04
Loss = 7.2173e-02, PNorm = 85.1634, GNorm = 0.6165, lr_0 = 1.1595e-04
Loss = 7.3324e-02, PNorm = 85.1657, GNorm = 0.6267, lr_0 = 1.1587e-04
Loss = 7.4500e-02, PNorm = 85.1684, GNorm = 0.7022, lr_0 = 1.1579e-04
Loss = 8.0051e-02, PNorm = 85.1701, GNorm = 0.5274, lr_0 = 1.1571e-04
Loss = 7.3561e-02, PNorm = 85.1723, GNorm = 0.5656, lr_0 = 1.1563e-04
Loss = 6.6467e-02, PNorm = 85.1729, GNorm = 0.5731, lr_0 = 1.1555e-04
Loss = 6.0267e-02, PNorm = 85.1718, GNorm = 0.5812, lr_0 = 1.1547e-04
Loss = 7.7148e-02, PNorm = 85.1733, GNorm = 0.9454, lr_0 = 1.1539e-04
Loss = 6.9925e-02, PNorm = 85.1742, GNorm = 0.5849, lr_0 = 1.1531e-04
Loss = 7.6224e-02, PNorm = 85.1750, GNorm = 0.5415, lr_0 = 1.1523e-04
Loss = 8.0531e-02, PNorm = 85.1770, GNorm = 0.6373, lr_0 = 1.1515e-04
Loss = 7.5558e-02, PNorm = 85.1787, GNorm = 0.5182, lr_0 = 1.1508e-04
Loss = 7.8645e-02, PNorm = 85.1804, GNorm = 0.5854, lr_0 = 1.1500e-04
Loss = 8.5103e-02, PNorm = 85.1823, GNorm = 0.7276, lr_0 = 1.1492e-04
Loss = 7.7007e-02, PNorm = 85.1847, GNorm = 0.5644, lr_0 = 1.1484e-04
Loss = 7.2699e-02, PNorm = 85.1892, GNorm = 0.7524, lr_0 = 1.1476e-04
Loss = 7.5045e-02, PNorm = 85.1928, GNorm = 0.5795, lr_0 = 1.1468e-04
Loss = 8.9627e-02, PNorm = 85.1926, GNorm = 0.5137, lr_0 = 1.1460e-04
Loss = 7.7468e-02, PNorm = 85.1935, GNorm = 0.5315, lr_0 = 1.1452e-04
Loss = 8.1979e-02, PNorm = 85.1957, GNorm = 0.7625, lr_0 = 1.1445e-04
Loss = 6.9917e-02, PNorm = 85.1967, GNorm = 0.5855, lr_0 = 1.1437e-04
Loss = 8.6017e-02, PNorm = 85.1987, GNorm = 0.7390, lr_0 = 1.1429e-04
Loss = 7.0982e-02, PNorm = 85.2021, GNorm = 0.8263, lr_0 = 1.1421e-04
Loss = 7.7237e-02, PNorm = 85.2024, GNorm = 0.7819, lr_0 = 1.1413e-04
Loss = 8.5886e-02, PNorm = 85.2029, GNorm = 0.6210, lr_0 = 1.1405e-04
Loss = 7.2018e-02, PNorm = 85.2045, GNorm = 0.5999, lr_0 = 1.1398e-04
Loss = 7.8011e-02, PNorm = 85.2072, GNorm = 0.7430, lr_0 = 1.1390e-04
Loss = 6.7353e-02, PNorm = 85.2096, GNorm = 0.5712, lr_0 = 1.1382e-04
Loss = 7.1934e-02, PNorm = 85.2117, GNorm = 0.5346, lr_0 = 1.1374e-04
Loss = 7.7571e-02, PNorm = 85.2128, GNorm = 0.5497, lr_0 = 1.1366e-04
Loss = 7.6422e-02, PNorm = 85.2133, GNorm = 0.7415, lr_0 = 1.1359e-04
Loss = 7.8257e-02, PNorm = 85.2160, GNorm = 0.5691, lr_0 = 1.1351e-04
Loss = 7.6645e-02, PNorm = 85.2179, GNorm = 0.4215, lr_0 = 1.1343e-04
Loss = 7.3078e-02, PNorm = 85.2202, GNorm = 0.6415, lr_0 = 1.1335e-04
Loss = 6.5306e-02, PNorm = 85.2207, GNorm = 0.8656, lr_0 = 1.1328e-04
Loss = 8.8100e-02, PNorm = 85.2234, GNorm = 0.6346, lr_0 = 1.1320e-04
Loss = 8.7003e-02, PNorm = 85.2258, GNorm = 0.6095, lr_0 = 1.1312e-04
Loss = 7.0759e-02, PNorm = 85.2282, GNorm = 0.8557, lr_0 = 1.1304e-04
Loss = 7.2927e-02, PNorm = 85.2299, GNorm = 0.5645, lr_0 = 1.1297e-04
Loss = 7.9710e-02, PNorm = 85.2314, GNorm = 0.6925, lr_0 = 1.1289e-04
Loss = 7.4608e-02, PNorm = 85.2326, GNorm = 0.5758, lr_0 = 1.1281e-04
Loss = 7.1838e-02, PNorm = 85.2325, GNorm = 0.6828, lr_0 = 1.1273e-04
Loss = 7.9187e-02, PNorm = 85.2341, GNorm = 0.4813, lr_0 = 1.1266e-04
Loss = 7.9849e-02, PNorm = 85.2361, GNorm = 0.9361, lr_0 = 1.1258e-04
Loss = 7.5168e-02, PNorm = 85.2375, GNorm = 0.5848, lr_0 = 1.1250e-04
Loss = 7.5462e-02, PNorm = 85.2387, GNorm = 0.6096, lr_0 = 1.1243e-04
Loss = 7.3456e-02, PNorm = 85.2405, GNorm = 0.5416, lr_0 = 1.1235e-04
Loss = 6.8858e-02, PNorm = 85.2416, GNorm = 0.6639, lr_0 = 1.1227e-04
Loss = 7.5193e-02, PNorm = 85.2412, GNorm = 0.6364, lr_0 = 1.1219e-04
Loss = 7.7493e-02, PNorm = 85.2407, GNorm = 0.7236, lr_0 = 1.1212e-04
Loss = 8.1074e-02, PNorm = 85.2431, GNorm = 0.6863, lr_0 = 1.1204e-04
Loss = 6.9243e-02, PNorm = 85.2451, GNorm = 0.8688, lr_0 = 1.1196e-04
Loss = 7.8270e-02, PNorm = 85.2465, GNorm = 0.6693, lr_0 = 1.1189e-04
Loss = 8.7950e-02, PNorm = 85.2480, GNorm = 0.8508, lr_0 = 1.1181e-04
Loss = 7.9123e-02, PNorm = 85.2485, GNorm = 0.7008, lr_0 = 1.1173e-04
Loss = 7.9203e-02, PNorm = 85.2494, GNorm = 0.7394, lr_0 = 1.1166e-04
Loss = 7.9948e-02, PNorm = 85.2521, GNorm = 0.6646, lr_0 = 1.1158e-04
Loss = 7.2259e-02, PNorm = 85.2539, GNorm = 0.5866, lr_0 = 1.1150e-04
Loss = 7.1494e-02, PNorm = 85.2550, GNorm = 0.6617, lr_0 = 1.1143e-04
Loss = 7.6321e-02, PNorm = 85.2560, GNorm = 1.2518, lr_0 = 1.1135e-04
Loss = 8.0480e-02, PNorm = 85.2541, GNorm = 0.7055, lr_0 = 1.1128e-04
Loss = 7.1880e-02, PNorm = 85.2560, GNorm = 0.4996, lr_0 = 1.1120e-04
Loss = 7.5054e-02, PNorm = 85.2567, GNorm = 0.6347, lr_0 = 1.1112e-04
Loss = 6.8485e-02, PNorm = 85.2593, GNorm = 0.5750, lr_0 = 1.1105e-04
Loss = 7.1344e-02, PNorm = 85.2612, GNorm = 0.6921, lr_0 = 1.1097e-04
Loss = 7.8882e-02, PNorm = 85.2621, GNorm = 0.6425, lr_0 = 1.1089e-04
Loss = 8.1162e-02, PNorm = 85.2634, GNorm = 0.5668, lr_0 = 1.1082e-04
Loss = 7.5768e-02, PNorm = 85.2659, GNorm = 0.4614, lr_0 = 1.1074e-04
Loss = 7.3410e-02, PNorm = 85.2690, GNorm = 0.5712, lr_0 = 1.1067e-04
Loss = 7.6793e-02, PNorm = 85.2703, GNorm = 0.6889, lr_0 = 1.1059e-04
Loss = 7.1134e-02, PNorm = 85.2708, GNorm = 0.6309, lr_0 = 1.1052e-04
Loss = 8.2146e-02, PNorm = 85.2724, GNorm = 0.7182, lr_0 = 1.1044e-04
Loss = 7.5562e-02, PNorm = 85.2741, GNorm = 0.8483, lr_0 = 1.1036e-04
Loss = 8.0440e-02, PNorm = 85.2762, GNorm = 0.6574, lr_0 = 1.1029e-04
Loss = 7.4106e-02, PNorm = 85.2777, GNorm = 0.5502, lr_0 = 1.1021e-04
Loss = 7.2502e-02, PNorm = 85.2782, GNorm = 0.6880, lr_0 = 1.1014e-04
Loss = 8.4285e-02, PNorm = 85.2803, GNorm = 0.5391, lr_0 = 1.1006e-04
Loss = 7.4260e-02, PNorm = 85.2826, GNorm = 0.5401, lr_0 = 1.0999e-04
Loss = 7.5347e-02, PNorm = 85.2837, GNorm = 0.5117, lr_0 = 1.0991e-04
Loss = 6.8625e-02, PNorm = 85.2862, GNorm = 0.6913, lr_0 = 1.0984e-04
Loss = 8.3058e-02, PNorm = 85.2888, GNorm = 0.5890, lr_0 = 1.0976e-04
Loss = 7.2831e-02, PNorm = 85.2900, GNorm = 0.7862, lr_0 = 1.0969e-04
Loss = 8.8234e-02, PNorm = 85.2923, GNorm = 0.6530, lr_0 = 1.0961e-04
Loss = 8.4196e-02, PNorm = 85.2941, GNorm = 0.8799, lr_0 = 1.0954e-04
Loss = 8.6642e-02, PNorm = 85.2969, GNorm = 0.7520, lr_0 = 1.0946e-04
Loss = 7.9332e-02, PNorm = 85.2978, GNorm = 0.6325, lr_0 = 1.0939e-04
Loss = 7.1655e-02, PNorm = 85.2975, GNorm = 0.5098, lr_0 = 1.0931e-04
Loss = 6.5984e-02, PNorm = 85.2982, GNorm = 0.8644, lr_0 = 1.0924e-04
Loss = 8.1061e-02, PNorm = 85.3006, GNorm = 0.7376, lr_0 = 1.0916e-04
Loss = 7.4334e-02, PNorm = 85.3024, GNorm = 0.8148, lr_0 = 1.0909e-04
Loss = 8.1435e-02, PNorm = 85.3047, GNorm = 0.7988, lr_0 = 1.0901e-04
Loss = 8.4776e-02, PNorm = 85.3061, GNorm = 0.7082, lr_0 = 1.0894e-04
Loss = 8.2378e-02, PNorm = 85.3085, GNorm = 0.7156, lr_0 = 1.0886e-04
Loss = 7.1287e-02, PNorm = 85.3095, GNorm = 0.7292, lr_0 = 1.0879e-04
Loss = 7.6602e-02, PNorm = 85.3107, GNorm = 0.6276, lr_0 = 1.0871e-04
Loss = 7.7326e-02, PNorm = 85.3113, GNorm = 0.5675, lr_0 = 1.0864e-04
Loss = 7.2110e-02, PNorm = 85.3127, GNorm = 0.6146, lr_0 = 1.0856e-04
Validation mae = 0.228411
Epoch 29
Loss = 7.4114e-02, PNorm = 85.3160, GNorm = 0.9315, lr_0 = 1.0849e-04
Loss = 7.5286e-02, PNorm = 85.3186, GNorm = 0.6997, lr_0 = 1.0841e-04
Loss = 7.1673e-02, PNorm = 85.3207, GNorm = 0.6905, lr_0 = 1.0834e-04
Loss = 7.8900e-02, PNorm = 85.3230, GNorm = 0.6705, lr_0 = 1.0827e-04
Loss = 7.2527e-02, PNorm = 85.3233, GNorm = 0.6493, lr_0 = 1.0819e-04
Loss = 7.6538e-02, PNorm = 85.3246, GNorm = 0.7361, lr_0 = 1.0812e-04
Loss = 7.5285e-02, PNorm = 85.3268, GNorm = 0.6648, lr_0 = 1.0804e-04
Loss = 6.6451e-02, PNorm = 85.3284, GNorm = 0.5367, lr_0 = 1.0797e-04
Loss = 7.5167e-02, PNorm = 85.3301, GNorm = 0.4982, lr_0 = 1.0790e-04
Loss = 6.9891e-02, PNorm = 85.3315, GNorm = 0.8884, lr_0 = 1.0782e-04
Loss = 7.6655e-02, PNorm = 85.3321, GNorm = 0.6837, lr_0 = 1.0775e-04
Loss = 7.3853e-02, PNorm = 85.3338, GNorm = 0.6117, lr_0 = 1.0767e-04
Loss = 7.8656e-02, PNorm = 85.3344, GNorm = 0.5421, lr_0 = 1.0760e-04
Loss = 7.9826e-02, PNorm = 85.3374, GNorm = 0.6190, lr_0 = 1.0753e-04
Loss = 6.5525e-02, PNorm = 85.3412, GNorm = 0.5394, lr_0 = 1.0745e-04
Loss = 7.0749e-02, PNorm = 85.3430, GNorm = 0.5582, lr_0 = 1.0738e-04
Loss = 6.7932e-02, PNorm = 85.3459, GNorm = 0.6213, lr_0 = 1.0731e-04
Loss = 7.7730e-02, PNorm = 85.3463, GNorm = 0.6049, lr_0 = 1.0723e-04
Loss = 7.8371e-02, PNorm = 85.3468, GNorm = 0.7375, lr_0 = 1.0716e-04
Loss = 6.5601e-02, PNorm = 85.3485, GNorm = 0.5930, lr_0 = 1.0709e-04
Loss = 6.8812e-02, PNorm = 85.3506, GNorm = 0.5406, lr_0 = 1.0701e-04
Loss = 7.3825e-02, PNorm = 85.3525, GNorm = 0.5490, lr_0 = 1.0694e-04
Loss = 8.2405e-02, PNorm = 85.3555, GNorm = 0.7312, lr_0 = 1.0687e-04
Loss = 7.4837e-02, PNorm = 85.3572, GNorm = 0.5402, lr_0 = 1.0679e-04
Loss = 6.8639e-02, PNorm = 85.3572, GNorm = 0.4553, lr_0 = 1.0672e-04
Loss = 7.6662e-02, PNorm = 85.3579, GNorm = 0.6505, lr_0 = 1.0665e-04
Loss = 7.7805e-02, PNorm = 85.3602, GNorm = 0.4553, lr_0 = 1.0657e-04
Loss = 7.5277e-02, PNorm = 85.3631, GNorm = 0.5592, lr_0 = 1.0650e-04
Loss = 7.4566e-02, PNorm = 85.3646, GNorm = 0.6832, lr_0 = 1.0643e-04
Loss = 7.0803e-02, PNorm = 85.3655, GNorm = 0.5313, lr_0 = 1.0635e-04
Loss = 8.4648e-02, PNorm = 85.3661, GNorm = 0.8089, lr_0 = 1.0628e-04
Loss = 6.6735e-02, PNorm = 85.3680, GNorm = 0.6321, lr_0 = 1.0621e-04
Loss = 7.7141e-02, PNorm = 85.3695, GNorm = 0.7127, lr_0 = 1.0614e-04
Loss = 7.7915e-02, PNorm = 85.3690, GNorm = 0.6946, lr_0 = 1.0606e-04
Loss = 7.8921e-02, PNorm = 85.3706, GNorm = 0.4898, lr_0 = 1.0599e-04
Loss = 6.1378e-02, PNorm = 85.3728, GNorm = 0.6227, lr_0 = 1.0592e-04
Loss = 6.7476e-02, PNorm = 85.3743, GNorm = 0.6050, lr_0 = 1.0585e-04
Loss = 7.0112e-02, PNorm = 85.3762, GNorm = 0.5979, lr_0 = 1.0577e-04
Loss = 6.0808e-02, PNorm = 85.3776, GNorm = 0.6052, lr_0 = 1.0570e-04
Loss = 7.4849e-02, PNorm = 85.3786, GNorm = 0.6098, lr_0 = 1.0563e-04
Loss = 8.5942e-02, PNorm = 85.3801, GNorm = 0.5996, lr_0 = 1.0556e-04
Loss = 6.7435e-02, PNorm = 85.3817, GNorm = 0.6454, lr_0 = 1.0548e-04
Loss = 7.2184e-02, PNorm = 85.3832, GNorm = 0.6658, lr_0 = 1.0541e-04
Loss = 7.1020e-02, PNorm = 85.3834, GNorm = 0.7008, lr_0 = 1.0534e-04
Loss = 8.1492e-02, PNorm = 85.3842, GNorm = 0.5520, lr_0 = 1.0527e-04
Loss = 7.0596e-02, PNorm = 85.3850, GNorm = 0.6543, lr_0 = 1.0519e-04
Loss = 8.0162e-02, PNorm = 85.3861, GNorm = 0.5227, lr_0 = 1.0512e-04
Loss = 7.7060e-02, PNorm = 85.3870, GNorm = 0.8176, lr_0 = 1.0505e-04
Loss = 7.5951e-02, PNorm = 85.3881, GNorm = 0.7634, lr_0 = 1.0498e-04
Loss = 6.6293e-02, PNorm = 85.3903, GNorm = 0.5973, lr_0 = 1.0491e-04
Loss = 7.9467e-02, PNorm = 85.3915, GNorm = 0.7040, lr_0 = 1.0483e-04
Loss = 7.8905e-02, PNorm = 85.3935, GNorm = 0.6792, lr_0 = 1.0476e-04
Loss = 7.9459e-02, PNorm = 85.3947, GNorm = 0.9719, lr_0 = 1.0469e-04
Loss = 7.6794e-02, PNorm = 85.3970, GNorm = 0.7647, lr_0 = 1.0462e-04
Loss = 6.5795e-02, PNorm = 85.3983, GNorm = 0.4375, lr_0 = 1.0455e-04
Loss = 6.6120e-02, PNorm = 85.3997, GNorm = 0.5572, lr_0 = 1.0448e-04
Loss = 7.9131e-02, PNorm = 85.4006, GNorm = 0.6989, lr_0 = 1.0440e-04
Loss = 6.7185e-02, PNorm = 85.4030, GNorm = 0.5073, lr_0 = 1.0433e-04
Loss = 6.8193e-02, PNorm = 85.4025, GNorm = 0.5685, lr_0 = 1.0426e-04
Loss = 7.6851e-02, PNorm = 85.4034, GNorm = 0.6920, lr_0 = 1.0419e-04
Loss = 7.5149e-02, PNorm = 85.4057, GNorm = 0.6933, lr_0 = 1.0412e-04
Loss = 7.2217e-02, PNorm = 85.4070, GNorm = 0.6435, lr_0 = 1.0405e-04
Loss = 6.4195e-02, PNorm = 85.4073, GNorm = 0.6164, lr_0 = 1.0398e-04
Loss = 6.8089e-02, PNorm = 85.4074, GNorm = 0.5426, lr_0 = 1.0391e-04
Loss = 7.5727e-02, PNorm = 85.4094, GNorm = 0.4518, lr_0 = 1.0383e-04
Loss = 7.1766e-02, PNorm = 85.4135, GNorm = 0.6004, lr_0 = 1.0376e-04
Loss = 7.5315e-02, PNorm = 85.4154, GNorm = 0.8276, lr_0 = 1.0369e-04
Loss = 7.7031e-02, PNorm = 85.4169, GNorm = 0.5475, lr_0 = 1.0362e-04
Loss = 7.9708e-02, PNorm = 85.4185, GNorm = 0.5552, lr_0 = 1.0355e-04
Loss = 7.6025e-02, PNorm = 85.4215, GNorm = 0.5729, lr_0 = 1.0348e-04
Loss = 7.2435e-02, PNorm = 85.4236, GNorm = 0.6998, lr_0 = 1.0341e-04
Loss = 7.6605e-02, PNorm = 85.4258, GNorm = 0.8029, lr_0 = 1.0334e-04
Loss = 7.1854e-02, PNorm = 85.4281, GNorm = 0.5916, lr_0 = 1.0327e-04
Loss = 6.8999e-02, PNorm = 85.4314, GNorm = 0.6217, lr_0 = 1.0320e-04
Loss = 7.8493e-02, PNorm = 85.4335, GNorm = 0.5817, lr_0 = 1.0312e-04
Loss = 7.6943e-02, PNorm = 85.4358, GNorm = 0.8807, lr_0 = 1.0305e-04
Loss = 6.4926e-02, PNorm = 85.4384, GNorm = 0.5286, lr_0 = 1.0298e-04
Loss = 6.1263e-02, PNorm = 85.4411, GNorm = 0.5258, lr_0 = 1.0291e-04
Loss = 8.1179e-02, PNorm = 85.4443, GNorm = 0.7260, lr_0 = 1.0284e-04
Loss = 7.3971e-02, PNorm = 85.4456, GNorm = 0.5416, lr_0 = 1.0277e-04
Loss = 7.1476e-02, PNorm = 85.4458, GNorm = 0.5050, lr_0 = 1.0270e-04
Loss = 7.7938e-02, PNorm = 85.4475, GNorm = 0.7678, lr_0 = 1.0263e-04
Loss = 8.3855e-02, PNorm = 85.4489, GNorm = 0.9627, lr_0 = 1.0256e-04
Loss = 7.3298e-02, PNorm = 85.4488, GNorm = 0.7139, lr_0 = 1.0249e-04
Loss = 7.1290e-02, PNorm = 85.4492, GNorm = 0.6626, lr_0 = 1.0242e-04
Loss = 7.5470e-02, PNorm = 85.4518, GNorm = 0.6592, lr_0 = 1.0235e-04
Loss = 7.5992e-02, PNorm = 85.4540, GNorm = 0.6155, lr_0 = 1.0228e-04
Loss = 7.5925e-02, PNorm = 85.4560, GNorm = 0.6577, lr_0 = 1.0221e-04
Loss = 7.6992e-02, PNorm = 85.4589, GNorm = 0.7037, lr_0 = 1.0214e-04
Loss = 7.5289e-02, PNorm = 85.4596, GNorm = 0.6604, lr_0 = 1.0207e-04
Loss = 8.4933e-02, PNorm = 85.4599, GNorm = 0.5746, lr_0 = 1.0200e-04
Loss = 7.5896e-02, PNorm = 85.4608, GNorm = 0.5914, lr_0 = 1.0193e-04
Loss = 7.9907e-02, PNorm = 85.4623, GNorm = 0.6497, lr_0 = 1.0186e-04
Loss = 8.0396e-02, PNorm = 85.4629, GNorm = 0.6052, lr_0 = 1.0179e-04
Loss = 6.7803e-02, PNorm = 85.4635, GNorm = 0.5607, lr_0 = 1.0172e-04
Loss = 7.2638e-02, PNorm = 85.4639, GNorm = 0.6346, lr_0 = 1.0165e-04
Loss = 7.3236e-02, PNorm = 85.4644, GNorm = 0.5827, lr_0 = 1.0158e-04
Loss = 6.8498e-02, PNorm = 85.4654, GNorm = 0.7407, lr_0 = 1.0151e-04
Loss = 8.2299e-02, PNorm = 85.4675, GNorm = 0.4504, lr_0 = 1.0144e-04
Loss = 8.6395e-02, PNorm = 85.4702, GNorm = 0.6862, lr_0 = 1.0137e-04
Loss = 7.6798e-02, PNorm = 85.4720, GNorm = 0.6684, lr_0 = 1.0130e-04
Loss = 7.6045e-02, PNorm = 85.4737, GNorm = 0.8370, lr_0 = 1.0123e-04
Loss = 7.6919e-02, PNorm = 85.4756, GNorm = 0.5145, lr_0 = 1.0116e-04
Loss = 8.1343e-02, PNorm = 85.4767, GNorm = 0.7410, lr_0 = 1.0110e-04
Loss = 7.3086e-02, PNorm = 85.4776, GNorm = 0.9162, lr_0 = 1.0103e-04
Loss = 7.9664e-02, PNorm = 85.4761, GNorm = 0.7076, lr_0 = 1.0096e-04
Loss = 8.1614e-02, PNorm = 85.4767, GNorm = 0.5641, lr_0 = 1.0089e-04
Loss = 7.7487e-02, PNorm = 85.4778, GNorm = 0.6183, lr_0 = 1.0082e-04
Loss = 7.7625e-02, PNorm = 85.4785, GNorm = 0.7040, lr_0 = 1.0075e-04
Loss = 7.2343e-02, PNorm = 85.4807, GNorm = 0.6897, lr_0 = 1.0068e-04
Loss = 8.3550e-02, PNorm = 85.4822, GNorm = 0.7771, lr_0 = 1.0061e-04
Loss = 6.3481e-02, PNorm = 85.4837, GNorm = 0.4790, lr_0 = 1.0054e-04
Loss = 7.6030e-02, PNorm = 85.4865, GNorm = 0.6229, lr_0 = 1.0047e-04
Loss = 7.6392e-02, PNorm = 85.4891, GNorm = 0.8162, lr_0 = 1.0041e-04
Loss = 8.1945e-02, PNorm = 85.4919, GNorm = 0.7037, lr_0 = 1.0034e-04
Loss = 8.1049e-02, PNorm = 85.4933, GNorm = 0.6792, lr_0 = 1.0027e-04
Loss = 7.5827e-02, PNorm = 85.4949, GNorm = 0.6704, lr_0 = 1.0020e-04
Loss = 8.2742e-02, PNorm = 85.4953, GNorm = 0.7342, lr_0 = 1.0013e-04
Loss = 6.8470e-02, PNorm = 85.4964, GNorm = 0.8822, lr_0 = 1.0006e-04
Loss = 6.9294e-02, PNorm = 85.4982, GNorm = 0.7354, lr_0 = 1.0000e-04
Validation mae = 0.226051
Model 0 best validation mae = 0.225983 on epoch 20
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.225272
Ensemble test mae = 0.225272
Fold 7
Splitting data with seed 7
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.1, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=500, bias=False)
        (W_h): Linear(in_features=500, out_features=500, bias=False)
        (W_o): Linear(in_features=633, out_features=500, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.1, inplace=False)
    (1): Linear(in_features=500, out_features=500, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.1, inplace=False)
    (4): Linear(in_features=500, out_features=500, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.1, inplace=False)
    (7): Linear(in_features=500, out_features=1, bias=True)
  )
)
Number of parameters = 1,142,001
Moving model to cuda
Epoch 0
Loss = 1.0117e+00, PNorm = 47.8612, GNorm = 2.5052, lr_0 = 1.0413e-04
Loss = 1.0078e+00, PNorm = 47.8626, GNorm = 2.9989, lr_0 = 1.0788e-04
Loss = 8.7773e-01, PNorm = 47.8646, GNorm = 5.9072, lr_0 = 1.1163e-04
Loss = 8.7252e-01, PNorm = 47.8671, GNorm = 4.6661, lr_0 = 1.1537e-04
Loss = 9.4361e-01, PNorm = 47.8703, GNorm = 6.4724, lr_0 = 1.1913e-04
Loss = 7.6947e-01, PNorm = 47.8743, GNorm = 2.6884, lr_0 = 1.2287e-04
Loss = 7.9018e-01, PNorm = 47.8806, GNorm = 3.3695, lr_0 = 1.2663e-04
Loss = 7.1631e-01, PNorm = 47.8879, GNorm = 5.3362, lr_0 = 1.3038e-04
Loss = 7.1420e-01, PNorm = 47.8943, GNorm = 8.2860, lr_0 = 1.3413e-04
Loss = 5.5467e-01, PNorm = 47.8997, GNorm = 8.4619, lr_0 = 1.3788e-04
Loss = 5.9199e-01, PNorm = 47.9049, GNorm = 12.0265, lr_0 = 1.4163e-04
Loss = 5.2367e-01, PNorm = 47.9111, GNorm = 12.1014, lr_0 = 1.4537e-04
Loss = 5.6582e-01, PNorm = 47.9158, GNorm = 25.6458, lr_0 = 1.4913e-04
Loss = 6.8231e-01, PNorm = 47.9193, GNorm = 13.4302, lr_0 = 1.5288e-04
Loss = 5.5512e-01, PNorm = 47.9247, GNorm = 12.7229, lr_0 = 1.5662e-04
Loss = 5.0108e-01, PNorm = 47.9302, GNorm = 4.8365, lr_0 = 1.6038e-04
Loss = 4.5834e-01, PNorm = 47.9358, GNorm = 13.3364, lr_0 = 1.6412e-04
Loss = 4.4833e-01, PNorm = 47.9414, GNorm = 10.2974, lr_0 = 1.6788e-04
Loss = 4.5505e-01, PNorm = 47.9459, GNorm = 8.8924, lr_0 = 1.7163e-04
Loss = 5.0138e-01, PNorm = 47.9511, GNorm = 1.8841, lr_0 = 1.7538e-04
Loss = 4.3582e-01, PNorm = 47.9571, GNorm = 9.0334, lr_0 = 1.7913e-04
Loss = 4.5008e-01, PNorm = 47.9623, GNorm = 16.7957, lr_0 = 1.8288e-04
Loss = 4.9612e-01, PNorm = 47.9667, GNorm = 10.9476, lr_0 = 1.8662e-04
Loss = 3.8723e-01, PNorm = 47.9734, GNorm = 2.0681, lr_0 = 1.9038e-04
Loss = 3.7236e-01, PNorm = 47.9785, GNorm = 3.8973, lr_0 = 1.9413e-04
Loss = 3.7919e-01, PNorm = 47.9840, GNorm = 1.7570, lr_0 = 1.9788e-04
Loss = 3.8050e-01, PNorm = 47.9903, GNorm = 16.2769, lr_0 = 2.0163e-04
Loss = 4.0408e-01, PNorm = 47.9944, GNorm = 10.0011, lr_0 = 2.0537e-04
Loss = 4.1794e-01, PNorm = 47.9997, GNorm = 11.6570, lr_0 = 2.0913e-04
Loss = 4.4306e-01, PNorm = 48.0070, GNorm = 10.0644, lr_0 = 2.1288e-04
Loss = 4.4913e-01, PNorm = 48.0152, GNorm = 4.3633, lr_0 = 2.1663e-04
Loss = 4.5955e-01, PNorm = 48.0221, GNorm = 11.8286, lr_0 = 2.2038e-04
Loss = 4.0365e-01, PNorm = 48.0289, GNorm = 6.9684, lr_0 = 2.2412e-04
Loss = 3.8344e-01, PNorm = 48.0347, GNorm = 11.7826, lr_0 = 2.2787e-04
Loss = 3.8333e-01, PNorm = 48.0404, GNorm = 1.4304, lr_0 = 2.3163e-04
Loss = 2.8784e-01, PNorm = 48.0468, GNorm = 12.9624, lr_0 = 2.3538e-04
Loss = 3.3187e-01, PNorm = 48.0513, GNorm = 2.7011, lr_0 = 2.3913e-04
Loss = 3.7156e-01, PNorm = 48.0571, GNorm = 10.8471, lr_0 = 2.4288e-04
Loss = 3.8822e-01, PNorm = 48.0630, GNorm = 10.3406, lr_0 = 2.4662e-04
Loss = 3.3777e-01, PNorm = 48.0702, GNorm = 13.8670, lr_0 = 2.5038e-04
Loss = 3.8830e-01, PNorm = 48.0747, GNorm = 20.1787, lr_0 = 2.5413e-04
Loss = 3.6672e-01, PNorm = 48.0800, GNorm = 2.7324, lr_0 = 2.5788e-04
Loss = 3.5812e-01, PNorm = 48.0871, GNorm = 3.3511, lr_0 = 2.6163e-04
Loss = 3.3070e-01, PNorm = 48.0938, GNorm = 2.5494, lr_0 = 2.6537e-04
Loss = 3.4694e-01, PNorm = 48.0997, GNorm = 3.0676, lr_0 = 2.6912e-04
Loss = 3.5439e-01, PNorm = 48.1053, GNorm = 17.2673, lr_0 = 2.7288e-04
Loss = 3.7259e-01, PNorm = 48.1128, GNorm = 1.5351, lr_0 = 2.7663e-04
Loss = 4.1349e-01, PNorm = 48.1199, GNorm = 6.5733, lr_0 = 2.8038e-04
Loss = 3.2946e-01, PNorm = 48.1317, GNorm = 9.1675, lr_0 = 2.8413e-04
Loss = 3.2492e-01, PNorm = 48.1432, GNorm = 2.8924, lr_0 = 2.8787e-04
Loss = 3.6443e-01, PNorm = 48.1447, GNorm = 2.7203, lr_0 = 2.9163e-04
Loss = 3.2659e-01, PNorm = 48.1518, GNorm = 2.9224, lr_0 = 2.9538e-04
Loss = 3.5230e-01, PNorm = 48.1588, GNorm = 1.0491, lr_0 = 2.9913e-04
Loss = 2.9867e-01, PNorm = 48.1670, GNorm = 6.5360, lr_0 = 3.0288e-04
Loss = 3.1375e-01, PNorm = 48.1759, GNorm = 7.5973, lr_0 = 3.0662e-04
Loss = 2.8678e-01, PNorm = 48.1832, GNorm = 9.2326, lr_0 = 3.1037e-04
Loss = 3.5084e-01, PNorm = 48.1846, GNorm = 15.0921, lr_0 = 3.1413e-04
Loss = 3.2593e-01, PNorm = 48.1881, GNorm = 2.1513, lr_0 = 3.1788e-04
Loss = 3.1225e-01, PNorm = 48.1941, GNorm = 7.5817, lr_0 = 3.2163e-04
Loss = 3.0813e-01, PNorm = 48.2008, GNorm = 17.3798, lr_0 = 3.2538e-04
Loss = 3.5752e-01, PNorm = 48.2063, GNorm = 9.6975, lr_0 = 3.2912e-04
Loss = 3.0845e-01, PNorm = 48.2138, GNorm = 1.3428, lr_0 = 3.3288e-04
Loss = 3.0412e-01, PNorm = 48.2213, GNorm = 1.5344, lr_0 = 3.3663e-04
Loss = 3.4327e-01, PNorm = 48.2288, GNorm = 7.4387, lr_0 = 3.4038e-04
Loss = 3.1337e-01, PNorm = 48.2349, GNorm = 9.6062, lr_0 = 3.4413e-04
Loss = 3.7500e-01, PNorm = 48.2446, GNorm = 3.2975, lr_0 = 3.4787e-04
Loss = 3.2985e-01, PNorm = 48.2539, GNorm = 4.2129, lr_0 = 3.5162e-04
Loss = 3.1442e-01, PNorm = 48.2582, GNorm = 6.3811, lr_0 = 3.5538e-04
Loss = 2.9676e-01, PNorm = 48.2714, GNorm = 4.2852, lr_0 = 3.5913e-04
Loss = 3.1535e-01, PNorm = 48.2804, GNorm = 1.8302, lr_0 = 3.6288e-04
Loss = 3.2417e-01, PNorm = 48.2856, GNorm = 1.3158, lr_0 = 3.6662e-04
Loss = 3.2087e-01, PNorm = 48.2916, GNorm = 5.9511, lr_0 = 3.7037e-04
Loss = 3.9237e-01, PNorm = 48.2995, GNorm = 12.0254, lr_0 = 3.7413e-04
Loss = 3.2371e-01, PNorm = 48.3120, GNorm = 11.4633, lr_0 = 3.7788e-04
Loss = 3.0773e-01, PNorm = 48.3252, GNorm = 1.7728, lr_0 = 3.8163e-04
Loss = 3.7723e-01, PNorm = 48.3346, GNorm = 1.7380, lr_0 = 3.8537e-04
Loss = 3.7056e-01, PNorm = 48.3409, GNorm = 10.5353, lr_0 = 3.8912e-04
Loss = 2.7048e-01, PNorm = 48.3515, GNorm = 4.3918, lr_0 = 3.9287e-04
Loss = 2.8260e-01, PNorm = 48.3594, GNorm = 4.2288, lr_0 = 3.9663e-04
Loss = 3.0790e-01, PNorm = 48.3682, GNorm = 2.1627, lr_0 = 4.0038e-04
Loss = 3.2935e-01, PNorm = 48.3777, GNorm = 3.2007, lr_0 = 4.0413e-04
Loss = 2.6648e-01, PNorm = 48.3870, GNorm = 3.4927, lr_0 = 4.0787e-04
Loss = 3.3002e-01, PNorm = 48.3953, GNorm = 1.4405, lr_0 = 4.1162e-04
Loss = 3.0683e-01, PNorm = 48.4020, GNorm = 4.0942, lr_0 = 4.1537e-04
Loss = 2.7780e-01, PNorm = 48.4090, GNorm = 1.1389, lr_0 = 4.1913e-04
Loss = 2.6621e-01, PNorm = 48.4147, GNorm = 1.3174, lr_0 = 4.2288e-04
Loss = 2.8899e-01, PNorm = 48.4216, GNorm = 1.5332, lr_0 = 4.2662e-04
Loss = 2.6712e-01, PNorm = 48.4311, GNorm = 2.3629, lr_0 = 4.3037e-04
Loss = 2.8421e-01, PNorm = 48.4409, GNorm = 7.2174, lr_0 = 4.3412e-04
Loss = 3.4005e-01, PNorm = 48.4474, GNorm = 6.2864, lr_0 = 4.3788e-04
Loss = 3.0903e-01, PNorm = 48.4584, GNorm = 5.2325, lr_0 = 4.4163e-04
Loss = 3.0937e-01, PNorm = 48.4689, GNorm = 3.7008, lr_0 = 4.4538e-04
Loss = 3.0411e-01, PNorm = 48.4799, GNorm = 1.3205, lr_0 = 4.4912e-04
Loss = 2.8930e-01, PNorm = 48.4911, GNorm = 4.1590, lr_0 = 4.5287e-04
Loss = 2.4940e-01, PNorm = 48.5012, GNorm = 1.2860, lr_0 = 4.5662e-04
Loss = 2.9939e-01, PNorm = 48.5094, GNorm = 1.5475, lr_0 = 4.6038e-04
Loss = 2.8890e-01, PNorm = 48.5197, GNorm = 1.9653, lr_0 = 4.6413e-04
Loss = 2.7214e-01, PNorm = 48.5296, GNorm = 1.4197, lr_0 = 4.6787e-04
Loss = 2.8321e-01, PNorm = 48.5410, GNorm = 1.3927, lr_0 = 4.7162e-04
Loss = 2.8216e-01, PNorm = 48.5496, GNorm = 0.9191, lr_0 = 4.7537e-04
Loss = 3.2523e-01, PNorm = 48.5604, GNorm = 4.9635, lr_0 = 4.7913e-04
Loss = 3.0636e-01, PNorm = 48.5724, GNorm = 1.6807, lr_0 = 4.8288e-04
Loss = 2.7928e-01, PNorm = 48.5917, GNorm = 4.4558, lr_0 = 4.8663e-04
Loss = 3.1220e-01, PNorm = 48.5977, GNorm = 7.6494, lr_0 = 4.9038e-04
Loss = 3.3357e-01, PNorm = 48.6085, GNorm = 2.6870, lr_0 = 4.9412e-04
Loss = 2.6327e-01, PNorm = 48.6172, GNorm = 5.1017, lr_0 = 4.9788e-04
Loss = 2.7741e-01, PNorm = 48.6269, GNorm = 1.5450, lr_0 = 5.0163e-04
Loss = 2.7110e-01, PNorm = 48.6367, GNorm = 1.5440, lr_0 = 5.0538e-04
Loss = 2.9056e-01, PNorm = 48.6474, GNorm = 6.9437, lr_0 = 5.0913e-04
Loss = 3.4224e-01, PNorm = 48.6545, GNorm = 4.5490, lr_0 = 5.1287e-04
Loss = 3.1322e-01, PNorm = 48.6714, GNorm = 1.9767, lr_0 = 5.1663e-04
Loss = 2.6476e-01, PNorm = 48.6862, GNorm = 4.1001, lr_0 = 5.2038e-04
Loss = 3.0013e-01, PNorm = 48.6964, GNorm = 2.6725, lr_0 = 5.2413e-04
Loss = 3.1323e-01, PNorm = 48.7043, GNorm = 3.9386, lr_0 = 5.2788e-04
Loss = 3.4274e-01, PNorm = 48.7191, GNorm = 9.9620, lr_0 = 5.3162e-04
Loss = 2.6318e-01, PNorm = 48.7339, GNorm = 1.7912, lr_0 = 5.3538e-04
Loss = 2.5308e-01, PNorm = 48.7463, GNorm = 5.6011, lr_0 = 5.3912e-04
Loss = 3.5267e-01, PNorm = 48.7524, GNorm = 5.1663, lr_0 = 5.4288e-04
Loss = 2.9985e-01, PNorm = 48.7679, GNorm = 4.0113, lr_0 = 5.4663e-04
Loss = 2.8717e-01, PNorm = 48.7843, GNorm = 1.3376, lr_0 = 5.5038e-04
Validation mae = 0.321688
Epoch 1
Loss = 3.0117e-01, PNorm = 48.8033, GNorm = 5.5119, lr_0 = 5.5413e-04
Loss = 2.6415e-01, PNorm = 48.8175, GNorm = 8.3173, lr_0 = 5.5787e-04
Loss = 2.9067e-01, PNorm = 48.8298, GNorm = 1.0408, lr_0 = 5.6163e-04
Loss = 2.9212e-01, PNorm = 48.8455, GNorm = 0.9487, lr_0 = 5.6538e-04
Loss = 2.7114e-01, PNorm = 48.8548, GNorm = 1.9509, lr_0 = 5.6913e-04
Loss = 2.7778e-01, PNorm = 48.8717, GNorm = 1.2287, lr_0 = 5.7288e-04
Loss = 2.6097e-01, PNorm = 48.8844, GNorm = 2.1502, lr_0 = 5.7662e-04
Loss = 2.9000e-01, PNorm = 48.8982, GNorm = 4.8766, lr_0 = 5.8038e-04
Loss = 2.6209e-01, PNorm = 48.9048, GNorm = 0.7056, lr_0 = 5.8413e-04
Loss = 2.7851e-01, PNorm = 48.9129, GNorm = 3.5352, lr_0 = 5.8788e-04
Loss = 3.0785e-01, PNorm = 48.9243, GNorm = 1.2914, lr_0 = 5.9163e-04
Loss = 2.9356e-01, PNorm = 48.9395, GNorm = 2.2372, lr_0 = 5.9538e-04
Loss = 3.0182e-01, PNorm = 48.9502, GNorm = 4.5179, lr_0 = 5.9913e-04
Loss = 2.6401e-01, PNorm = 48.9632, GNorm = 3.0027, lr_0 = 6.0288e-04
Loss = 3.1192e-01, PNorm = 48.9707, GNorm = 4.9512, lr_0 = 6.0663e-04
Loss = 3.1400e-01, PNorm = 48.9798, GNorm = 5.2814, lr_0 = 6.1038e-04
Loss = 2.5993e-01, PNorm = 48.9979, GNorm = 2.0636, lr_0 = 6.1413e-04
Loss = 2.9941e-01, PNorm = 49.0074, GNorm = 3.9060, lr_0 = 6.1788e-04
Loss = 2.8126e-01, PNorm = 49.0255, GNorm = 1.4302, lr_0 = 6.2163e-04
Loss = 2.8125e-01, PNorm = 49.0413, GNorm = 1.7090, lr_0 = 6.2538e-04
Loss = 2.6696e-01, PNorm = 49.0593, GNorm = 2.2146, lr_0 = 6.2913e-04
Loss = 2.7495e-01, PNorm = 49.0745, GNorm = 1.1714, lr_0 = 6.3288e-04
Loss = 2.5601e-01, PNorm = 49.0876, GNorm = 2.4042, lr_0 = 6.3663e-04
Loss = 2.7430e-01, PNorm = 49.0979, GNorm = 4.5905, lr_0 = 6.4038e-04
Loss = 2.8987e-01, PNorm = 49.1149, GNorm = 4.1315, lr_0 = 6.4413e-04
Loss = 2.8050e-01, PNorm = 49.1326, GNorm = 4.3982, lr_0 = 6.4788e-04
Loss = 2.6936e-01, PNorm = 49.1504, GNorm = 3.6187, lr_0 = 6.5163e-04
Loss = 2.7825e-01, PNorm = 49.1633, GNorm = 1.6742, lr_0 = 6.5538e-04
Loss = 2.3535e-01, PNorm = 49.1835, GNorm = 0.7315, lr_0 = 6.5913e-04
Loss = 2.6715e-01, PNorm = 49.1927, GNorm = 2.4466, lr_0 = 6.6288e-04
Loss = 2.5322e-01, PNorm = 49.1984, GNorm = 2.2113, lr_0 = 6.6663e-04
Loss = 2.8459e-01, PNorm = 49.2126, GNorm = 2.8296, lr_0 = 6.7038e-04
Loss = 2.6794e-01, PNorm = 49.2297, GNorm = 2.2442, lr_0 = 6.7413e-04
Loss = 2.4970e-01, PNorm = 49.2456, GNorm = 2.6996, lr_0 = 6.7788e-04
Loss = 2.7839e-01, PNorm = 49.2586, GNorm = 5.0794, lr_0 = 6.8163e-04
Loss = 2.6383e-01, PNorm = 49.2763, GNorm = 2.1290, lr_0 = 6.8538e-04
Loss = 2.6545e-01, PNorm = 49.2873, GNorm = 0.8666, lr_0 = 6.8913e-04
Loss = 2.4187e-01, PNorm = 49.3025, GNorm = 2.4048, lr_0 = 6.9288e-04
Loss = 2.6244e-01, PNorm = 49.3214, GNorm = 3.0408, lr_0 = 6.9663e-04
Loss = 2.9517e-01, PNorm = 49.3382, GNorm = 5.8090, lr_0 = 7.0038e-04
Loss = 2.9429e-01, PNorm = 49.3622, GNorm = 2.7282, lr_0 = 7.0413e-04
Loss = 3.1785e-01, PNorm = 49.3845, GNorm = 1.2983, lr_0 = 7.0788e-04
Loss = 2.8091e-01, PNorm = 49.3992, GNorm = 2.7093, lr_0 = 7.1163e-04
Loss = 2.6840e-01, PNorm = 49.4176, GNorm = 1.1763, lr_0 = 7.1538e-04
Loss = 2.6692e-01, PNorm = 49.4306, GNorm = 2.5310, lr_0 = 7.1913e-04
Loss = 2.8996e-01, PNorm = 49.4422, GNorm = 1.8612, lr_0 = 7.2288e-04
Loss = 2.4626e-01, PNorm = 49.4570, GNorm = 5.7982, lr_0 = 7.2663e-04
Loss = 3.0045e-01, PNorm = 49.4684, GNorm = 2.0391, lr_0 = 7.3038e-04
Loss = 2.7944e-01, PNorm = 49.4828, GNorm = 7.2381, lr_0 = 7.3413e-04
Loss = 2.6651e-01, PNorm = 49.5004, GNorm = 1.0934, lr_0 = 7.3788e-04
Loss = 2.6197e-01, PNorm = 49.5214, GNorm = 4.6291, lr_0 = 7.4163e-04
Loss = 2.7787e-01, PNorm = 49.5401, GNorm = 1.0217, lr_0 = 7.4538e-04
Loss = 3.2968e-01, PNorm = 49.5530, GNorm = 4.0964, lr_0 = 7.4913e-04
Loss = 3.6393e-01, PNorm = 49.5819, GNorm = 3.2359, lr_0 = 7.5288e-04
Loss = 3.0325e-01, PNorm = 49.6085, GNorm = 2.9898, lr_0 = 7.5663e-04
Loss = 2.6720e-01, PNorm = 49.6336, GNorm = 2.6018, lr_0 = 7.6038e-04
Loss = 2.4912e-01, PNorm = 49.6484, GNorm = 1.5342, lr_0 = 7.6413e-04
Loss = 2.7565e-01, PNorm = 49.6589, GNorm = 2.1470, lr_0 = 7.6788e-04
Loss = 2.4985e-01, PNorm = 49.6719, GNorm = 1.4902, lr_0 = 7.7163e-04
Loss = 2.3929e-01, PNorm = 49.6894, GNorm = 2.8515, lr_0 = 7.7538e-04
Loss = 3.0302e-01, PNorm = 49.7116, GNorm = 2.7224, lr_0 = 7.7913e-04
Loss = 2.7355e-01, PNorm = 49.7355, GNorm = 2.4980, lr_0 = 7.8288e-04
Loss = 2.8106e-01, PNorm = 49.7550, GNorm = 0.8630, lr_0 = 7.8663e-04
Loss = 2.6145e-01, PNorm = 49.7750, GNorm = 2.6709, lr_0 = 7.9038e-04
Loss = 2.3328e-01, PNorm = 49.7955, GNorm = 0.9837, lr_0 = 7.9413e-04
Loss = 2.3680e-01, PNorm = 49.8161, GNorm = 2.5339, lr_0 = 7.9788e-04
Loss = 2.5586e-01, PNorm = 49.8350, GNorm = 3.4280, lr_0 = 8.0163e-04
Loss = 2.2568e-01, PNorm = 49.8474, GNorm = 1.6767, lr_0 = 8.0538e-04
Loss = 2.2878e-01, PNorm = 49.8605, GNorm = 3.2604, lr_0 = 8.0913e-04
Loss = 2.3196e-01, PNorm = 49.8748, GNorm = 2.2502, lr_0 = 8.1288e-04
Loss = 2.4599e-01, PNorm = 49.8968, GNorm = 1.7533, lr_0 = 8.1663e-04
Loss = 2.3615e-01, PNorm = 49.9186, GNorm = 0.8916, lr_0 = 8.2038e-04
Loss = 2.4881e-01, PNorm = 49.9390, GNorm = 1.8611, lr_0 = 8.2413e-04
Loss = 2.7323e-01, PNorm = 49.9621, GNorm = 1.4588, lr_0 = 8.2788e-04
Loss = 2.6532e-01, PNorm = 49.9811, GNorm = 1.7436, lr_0 = 8.3163e-04
Loss = 2.2876e-01, PNorm = 49.9975, GNorm = 1.3754, lr_0 = 8.3538e-04
Loss = 2.4356e-01, PNorm = 50.0194, GNorm = 4.0782, lr_0 = 8.3913e-04
Loss = 2.5190e-01, PNorm = 50.0321, GNorm = 3.4707, lr_0 = 8.4288e-04
Loss = 2.2923e-01, PNorm = 50.0522, GNorm = 1.5509, lr_0 = 8.4663e-04
Loss = 2.4425e-01, PNorm = 50.0781, GNorm = 4.1320, lr_0 = 8.5038e-04
Loss = 2.7025e-01, PNorm = 50.0967, GNorm = 0.6642, lr_0 = 8.5413e-04
Loss = 2.2831e-01, PNorm = 50.1192, GNorm = 1.2260, lr_0 = 8.5788e-04
Loss = 2.3765e-01, PNorm = 50.1450, GNorm = 0.9068, lr_0 = 8.6163e-04
Loss = 2.5386e-01, PNorm = 50.1710, GNorm = 3.4199, lr_0 = 8.6538e-04
Loss = 2.5520e-01, PNorm = 50.1883, GNorm = 6.0633, lr_0 = 8.6913e-04
Loss = 2.3836e-01, PNorm = 50.2067, GNorm = 2.9239, lr_0 = 8.7288e-04
Loss = 2.1607e-01, PNorm = 50.2266, GNorm = 1.6504, lr_0 = 8.7663e-04
Loss = 2.4881e-01, PNorm = 50.2441, GNorm = 1.1567, lr_0 = 8.8038e-04
Loss = 2.6485e-01, PNorm = 50.2650, GNorm = 1.1244, lr_0 = 8.8413e-04
Loss = 2.8615e-01, PNorm = 50.2890, GNorm = 1.3481, lr_0 = 8.8788e-04
Loss = 2.7999e-01, PNorm = 50.3192, GNorm = 2.3715, lr_0 = 8.9163e-04
Loss = 2.4135e-01, PNorm = 50.3431, GNorm = 0.7173, lr_0 = 8.9538e-04
Loss = 2.6862e-01, PNorm = 50.3639, GNorm = 5.0089, lr_0 = 8.9913e-04
Loss = 2.4077e-01, PNorm = 50.3885, GNorm = 0.7950, lr_0 = 9.0288e-04
Loss = 2.5535e-01, PNorm = 50.4094, GNorm = 1.5583, lr_0 = 9.0663e-04
Loss = 2.3400e-01, PNorm = 50.4270, GNorm = 4.0863, lr_0 = 9.1038e-04
Loss = 2.2660e-01, PNorm = 50.4396, GNorm = 2.0525, lr_0 = 9.1413e-04
Loss = 2.5841e-01, PNorm = 50.4580, GNorm = 3.2615, lr_0 = 9.1788e-04
Loss = 3.2887e-01, PNorm = 50.4810, GNorm = 0.9360, lr_0 = 9.2163e-04
Loss = 2.7655e-01, PNorm = 50.5169, GNorm = 3.5191, lr_0 = 9.2538e-04
Loss = 2.9371e-01, PNorm = 50.5546, GNorm = 2.3686, lr_0 = 9.2913e-04
Loss = 2.5125e-01, PNorm = 50.5909, GNorm = 2.2227, lr_0 = 9.3288e-04
Loss = 2.4333e-01, PNorm = 50.6162, GNorm = 1.8251, lr_0 = 9.3663e-04
Loss = 2.4423e-01, PNorm = 50.6377, GNorm = 2.2966, lr_0 = 9.4038e-04
Loss = 2.3474e-01, PNorm = 50.6611, GNorm = 0.8515, lr_0 = 9.4413e-04
Loss = 2.6730e-01, PNorm = 50.6860, GNorm = 2.4457, lr_0 = 9.4788e-04
Loss = 2.7089e-01, PNorm = 50.7020, GNorm = 4.2219, lr_0 = 9.5163e-04
Loss = 2.6305e-01, PNorm = 50.7259, GNorm = 1.1260, lr_0 = 9.5538e-04
Loss = 2.2908e-01, PNorm = 50.7541, GNorm = 3.6902, lr_0 = 9.5913e-04
Loss = 2.4838e-01, PNorm = 50.7753, GNorm = 4.9838, lr_0 = 9.6288e-04
Loss = 2.6585e-01, PNorm = 50.7937, GNorm = 2.6854, lr_0 = 9.6663e-04
Loss = 2.5136e-01, PNorm = 50.8174, GNorm = 2.2321, lr_0 = 9.7038e-04
Loss = 2.2682e-01, PNorm = 50.8439, GNorm = 1.7476, lr_0 = 9.7413e-04
Loss = 2.5759e-01, PNorm = 50.8613, GNorm = 2.6565, lr_0 = 9.7788e-04
Loss = 2.3891e-01, PNorm = 50.8925, GNorm = 1.9329, lr_0 = 9.8163e-04
Loss = 2.9344e-01, PNorm = 50.9133, GNorm = 4.0852, lr_0 = 9.8537e-04
Loss = 3.1278e-01, PNorm = 50.9363, GNorm = 1.5945, lr_0 = 9.8912e-04
Loss = 2.7836e-01, PNorm = 50.9753, GNorm = 1.8549, lr_0 = 9.9288e-04
Loss = 2.4018e-01, PNorm = 51.0065, GNorm = 1.0154, lr_0 = 9.9663e-04
Loss = 2.2638e-01, PNorm = 51.0273, GNorm = 0.8000, lr_0 = 9.9993e-04
Validation mae = 0.287195
Epoch 2
Loss = 2.6220e-01, PNorm = 51.0536, GNorm = 3.5126, lr_0 = 9.9925e-04
Loss = 2.3916e-01, PNorm = 51.0875, GNorm = 1.1853, lr_0 = 9.9856e-04
Loss = 1.9501e-01, PNorm = 51.1065, GNorm = 2.7813, lr_0 = 9.9788e-04
Loss = 2.3018e-01, PNorm = 51.1270, GNorm = 2.0572, lr_0 = 9.9719e-04
Loss = 2.4274e-01, PNorm = 51.1561, GNorm = 0.8652, lr_0 = 9.9651e-04
Loss = 2.5894e-01, PNorm = 51.1869, GNorm = 1.2833, lr_0 = 9.9583e-04
Loss = 2.1921e-01, PNorm = 51.2213, GNorm = 2.9171, lr_0 = 9.9515e-04
Loss = 2.7702e-01, PNorm = 51.2580, GNorm = 6.4164, lr_0 = 9.9446e-04
Loss = 2.6351e-01, PNorm = 51.2868, GNorm = 2.1304, lr_0 = 9.9378e-04
Loss = 2.3796e-01, PNorm = 51.3126, GNorm = 2.5296, lr_0 = 9.9310e-04
Loss = 2.4691e-01, PNorm = 51.3413, GNorm = 1.6945, lr_0 = 9.9242e-04
Loss = 2.3613e-01, PNorm = 51.3530, GNorm = 1.6339, lr_0 = 9.9174e-04
Loss = 2.3312e-01, PNorm = 51.3881, GNorm = 1.8962, lr_0 = 9.9106e-04
Loss = 2.3844e-01, PNorm = 51.4042, GNorm = 2.6808, lr_0 = 9.9038e-04
Loss = 2.1591e-01, PNorm = 51.4306, GNorm = 1.7726, lr_0 = 9.8971e-04
Loss = 2.3677e-01, PNorm = 51.4558, GNorm = 1.5129, lr_0 = 9.8903e-04
Loss = 2.4346e-01, PNorm = 51.4869, GNorm = 1.8066, lr_0 = 9.8835e-04
Loss = 2.2755e-01, PNorm = 51.5120, GNorm = 1.1840, lr_0 = 9.8767e-04
Loss = 2.1578e-01, PNorm = 51.5375, GNorm = 0.8558, lr_0 = 9.8700e-04
Loss = 2.4288e-01, PNorm = 51.5560, GNorm = 2.7124, lr_0 = 9.8632e-04
Loss = 2.1632e-01, PNorm = 51.5839, GNorm = 0.7682, lr_0 = 9.8564e-04
Loss = 2.1918e-01, PNorm = 51.6034, GNorm = 0.7621, lr_0 = 9.8497e-04
Loss = 2.4767e-01, PNorm = 51.6221, GNorm = 1.3362, lr_0 = 9.8429e-04
Loss = 2.2319e-01, PNorm = 51.6420, GNorm = 2.6882, lr_0 = 9.8362e-04
Loss = 2.4168e-01, PNorm = 51.6647, GNorm = 2.6798, lr_0 = 9.8295e-04
Loss = 2.4903e-01, PNorm = 51.6969, GNorm = 1.8257, lr_0 = 9.8227e-04
Loss = 2.3975e-01, PNorm = 51.7331, GNorm = 1.3162, lr_0 = 9.8160e-04
Loss = 2.1791e-01, PNorm = 51.7549, GNorm = 1.3010, lr_0 = 9.8093e-04
Loss = 2.3133e-01, PNorm = 51.7833, GNorm = 0.9324, lr_0 = 9.8026e-04
Loss = 2.3014e-01, PNorm = 51.8005, GNorm = 1.0149, lr_0 = 9.7958e-04
Loss = 2.6125e-01, PNorm = 51.8386, GNorm = 1.7515, lr_0 = 9.7891e-04
Loss = 2.4060e-01, PNorm = 51.8594, GNorm = 1.2113, lr_0 = 9.7824e-04
Loss = 2.5521e-01, PNorm = 51.8844, GNorm = 0.7113, lr_0 = 9.7757e-04
Loss = 2.7747e-01, PNorm = 51.9142, GNorm = 3.3261, lr_0 = 9.7690e-04
Loss = 2.5101e-01, PNorm = 51.9484, GNorm = 3.8136, lr_0 = 9.7623e-04
Loss = 2.0799e-01, PNorm = 51.9725, GNorm = 2.8332, lr_0 = 9.7556e-04
Loss = 2.4460e-01, PNorm = 51.9900, GNorm = 0.9660, lr_0 = 9.7490e-04
Loss = 2.3570e-01, PNorm = 52.0161, GNorm = 1.9760, lr_0 = 9.7423e-04
Loss = 2.4746e-01, PNorm = 52.0456, GNorm = 4.7390, lr_0 = 9.7356e-04
Loss = 2.5311e-01, PNorm = 52.0776, GNorm = 0.7064, lr_0 = 9.7289e-04
Loss = 2.5669e-01, PNorm = 52.1089, GNorm = 2.2746, lr_0 = 9.7223e-04
Loss = 2.3650e-01, PNorm = 52.1414, GNorm = 1.2409, lr_0 = 9.7156e-04
Loss = 2.2712e-01, PNorm = 52.1641, GNorm = 0.8411, lr_0 = 9.7090e-04
Loss = 2.1972e-01, PNorm = 52.1918, GNorm = 1.1024, lr_0 = 9.7023e-04
Loss = 2.7164e-01, PNorm = 52.2146, GNorm = 2.0713, lr_0 = 9.6957e-04
Loss = 2.5771e-01, PNorm = 52.2369, GNorm = 1.2814, lr_0 = 9.6890e-04
Loss = 2.4841e-01, PNorm = 52.2641, GNorm = 1.0766, lr_0 = 9.6824e-04
Loss = 2.1456e-01, PNorm = 52.2879, GNorm = 0.5759, lr_0 = 9.6757e-04
Loss = 2.3857e-01, PNorm = 52.3110, GNorm = 0.8300, lr_0 = 9.6691e-04
Loss = 2.5991e-01, PNorm = 52.3300, GNorm = 1.0349, lr_0 = 9.6625e-04
Loss = 2.3601e-01, PNorm = 52.3605, GNorm = 1.5311, lr_0 = 9.6559e-04
Loss = 2.6698e-01, PNorm = 52.3918, GNorm = 1.8311, lr_0 = 9.6493e-04
Loss = 2.3853e-01, PNorm = 52.4315, GNorm = 2.3646, lr_0 = 9.6427e-04
Loss = 2.3453e-01, PNorm = 52.4689, GNorm = 1.3751, lr_0 = 9.6360e-04
Loss = 2.3570e-01, PNorm = 52.4958, GNorm = 0.9326, lr_0 = 9.6294e-04
Loss = 2.3022e-01, PNorm = 52.5151, GNorm = 2.0911, lr_0 = 9.6228e-04
Loss = 2.4756e-01, PNorm = 52.5296, GNorm = 0.9014, lr_0 = 9.6163e-04
Loss = 2.2856e-01, PNorm = 52.5548, GNorm = 1.7845, lr_0 = 9.6097e-04
Loss = 2.2704e-01, PNorm = 52.5691, GNorm = 1.0324, lr_0 = 9.6031e-04
Loss = 2.4360e-01, PNorm = 52.5987, GNorm = 2.4183, lr_0 = 9.5965e-04
Loss = 2.0749e-01, PNorm = 52.6223, GNorm = 0.8449, lr_0 = 9.5899e-04
Loss = 2.4349e-01, PNorm = 52.6483, GNorm = 0.9545, lr_0 = 9.5834e-04
Loss = 2.4960e-01, PNorm = 52.6678, GNorm = 2.3771, lr_0 = 9.5768e-04
Loss = 2.2265e-01, PNorm = 52.6866, GNorm = 0.8185, lr_0 = 9.5702e-04
Loss = 2.3656e-01, PNorm = 52.7168, GNorm = 0.8674, lr_0 = 9.5637e-04
Loss = 2.1448e-01, PNorm = 52.7461, GNorm = 0.8950, lr_0 = 9.5571e-04
Loss = 2.4408e-01, PNorm = 52.7704, GNorm = 2.0401, lr_0 = 9.5506e-04
Loss = 2.2260e-01, PNorm = 52.7894, GNorm = 1.7240, lr_0 = 9.5440e-04
Loss = 1.9903e-01, PNorm = 52.8140, GNorm = 3.0630, lr_0 = 9.5375e-04
Loss = 2.1456e-01, PNorm = 52.8376, GNorm = 1.8917, lr_0 = 9.5310e-04
Loss = 2.2974e-01, PNorm = 52.8647, GNorm = 1.6375, lr_0 = 9.5244e-04
Loss = 2.0215e-01, PNorm = 52.8877, GNorm = 3.6378, lr_0 = 9.5179e-04
Loss = 2.1082e-01, PNorm = 52.9049, GNorm = 1.0593, lr_0 = 9.5114e-04
Loss = 2.4049e-01, PNorm = 52.9288, GNorm = 0.9686, lr_0 = 9.5049e-04
Loss = 2.1925e-01, PNorm = 52.9473, GNorm = 3.5336, lr_0 = 9.4984e-04
Loss = 2.2758e-01, PNorm = 52.9665, GNorm = 1.0007, lr_0 = 9.4919e-04
Loss = 2.3104e-01, PNorm = 52.9918, GNorm = 1.2592, lr_0 = 9.4854e-04
Loss = 2.1736e-01, PNorm = 53.0100, GNorm = 0.8291, lr_0 = 9.4789e-04
Loss = 2.0511e-01, PNorm = 53.0353, GNorm = 2.4086, lr_0 = 9.4724e-04
Loss = 2.1584e-01, PNorm = 53.0640, GNorm = 0.9341, lr_0 = 9.4659e-04
Loss = 2.1789e-01, PNorm = 53.0844, GNorm = 1.5750, lr_0 = 9.4594e-04
Loss = 2.0218e-01, PNorm = 53.1091, GNorm = 2.3615, lr_0 = 9.4529e-04
Loss = 2.2667e-01, PNorm = 53.1311, GNorm = 0.6120, lr_0 = 9.4464e-04
Loss = 2.0161e-01, PNorm = 53.1546, GNorm = 0.9044, lr_0 = 9.4400e-04
Loss = 2.1186e-01, PNorm = 53.1783, GNorm = 0.8301, lr_0 = 9.4335e-04
Loss = 2.1203e-01, PNorm = 53.2044, GNorm = 1.4510, lr_0 = 9.4270e-04
Loss = 2.4906e-01, PNorm = 53.2306, GNorm = 3.5473, lr_0 = 9.4206e-04
Loss = 2.3558e-01, PNorm = 53.2640, GNorm = 1.1084, lr_0 = 9.4141e-04
Loss = 2.0866e-01, PNorm = 53.2892, GNorm = 0.6388, lr_0 = 9.4077e-04
Loss = 2.6299e-01, PNorm = 53.3142, GNorm = 1.0590, lr_0 = 9.4012e-04
Loss = 2.5636e-01, PNorm = 53.3402, GNorm = 2.2075, lr_0 = 9.3948e-04
Loss = 2.1518e-01, PNorm = 53.3663, GNorm = 1.2191, lr_0 = 9.3884e-04
Loss = 2.6134e-01, PNorm = 53.3991, GNorm = 1.7529, lr_0 = 9.3819e-04
Loss = 2.3605e-01, PNorm = 53.4309, GNorm = 1.1543, lr_0 = 9.3755e-04
Loss = 2.4900e-01, PNorm = 53.4537, GNorm = 1.0475, lr_0 = 9.3691e-04
Loss = 2.1579e-01, PNorm = 53.4752, GNorm = 1.3332, lr_0 = 9.3627e-04
Loss = 2.3924e-01, PNorm = 53.4975, GNorm = 1.0757, lr_0 = 9.3562e-04
Loss = 2.4963e-01, PNorm = 53.5201, GNorm = 2.0743, lr_0 = 9.3498e-04
Loss = 2.4965e-01, PNorm = 53.5510, GNorm = 3.1193, lr_0 = 9.3434e-04
Loss = 2.4420e-01, PNorm = 53.5779, GNorm = 0.9502, lr_0 = 9.3370e-04
Loss = 2.4182e-01, PNorm = 53.6069, GNorm = 0.7646, lr_0 = 9.3306e-04
Loss = 2.3099e-01, PNorm = 53.6206, GNorm = 0.6200, lr_0 = 9.3242e-04
Loss = 1.9917e-01, PNorm = 53.6480, GNorm = 1.7093, lr_0 = 9.3178e-04
Loss = 1.9346e-01, PNorm = 53.6708, GNorm = 1.1886, lr_0 = 9.3115e-04
Loss = 2.0522e-01, PNorm = 53.6898, GNorm = 1.2530, lr_0 = 9.3051e-04
Loss = 2.3737e-01, PNorm = 53.7104, GNorm = 0.8206, lr_0 = 9.2987e-04
Loss = 2.2589e-01, PNorm = 53.7338, GNorm = 0.8600, lr_0 = 9.2923e-04
Loss = 1.9070e-01, PNorm = 53.7604, GNorm = 0.5922, lr_0 = 9.2860e-04
Loss = 2.0637e-01, PNorm = 53.7858, GNorm = 2.2232, lr_0 = 9.2796e-04
Loss = 2.2963e-01, PNorm = 53.8169, GNorm = 0.9767, lr_0 = 9.2733e-04
Loss = 2.2283e-01, PNorm = 53.8379, GNorm = 0.9282, lr_0 = 9.2669e-04
Loss = 1.9670e-01, PNorm = 53.8632, GNorm = 1.2637, lr_0 = 9.2606e-04
Loss = 2.1600e-01, PNorm = 53.8890, GNorm = 1.3952, lr_0 = 9.2542e-04
Loss = 2.1318e-01, PNorm = 53.9169, GNorm = 1.8244, lr_0 = 9.2479e-04
Loss = 2.0070e-01, PNorm = 53.9418, GNorm = 2.0360, lr_0 = 9.2415e-04
Loss = 2.0452e-01, PNorm = 53.9558, GNorm = 0.7264, lr_0 = 9.2352e-04
Loss = 2.0825e-01, PNorm = 53.9793, GNorm = 0.8497, lr_0 = 9.2289e-04
Loss = 2.0083e-01, PNorm = 53.9943, GNorm = 1.2378, lr_0 = 9.2226e-04
Loss = 2.0173e-01, PNorm = 54.0133, GNorm = 0.7583, lr_0 = 9.2162e-04
Loss = 1.9556e-01, PNorm = 54.0370, GNorm = 0.9820, lr_0 = 9.2099e-04
Validation mae = 0.286469
Epoch 3
Loss = 2.1491e-01, PNorm = 54.0637, GNorm = 2.4234, lr_0 = 9.2036e-04
Loss = 2.4525e-01, PNorm = 54.1074, GNorm = 2.1063, lr_0 = 9.1973e-04
Loss = 2.0970e-01, PNorm = 54.1363, GNorm = 1.4854, lr_0 = 9.1910e-04
Loss = 2.3176e-01, PNorm = 54.1609, GNorm = 2.8057, lr_0 = 9.1847e-04
Loss = 2.0653e-01, PNorm = 54.1902, GNorm = 2.0007, lr_0 = 9.1784e-04
Loss = 2.0621e-01, PNorm = 54.2208, GNorm = 2.8792, lr_0 = 9.1721e-04
Loss = 2.0575e-01, PNorm = 54.2456, GNorm = 1.8243, lr_0 = 9.1658e-04
Loss = 2.0276e-01, PNorm = 54.2742, GNorm = 0.7385, lr_0 = 9.1596e-04
Loss = 1.9473e-01, PNorm = 54.2866, GNorm = 1.7319, lr_0 = 9.1533e-04
Loss = 2.0631e-01, PNorm = 54.3037, GNorm = 1.6350, lr_0 = 9.1470e-04
Loss = 2.0737e-01, PNorm = 54.3295, GNorm = 0.5841, lr_0 = 9.1408e-04
Loss = 1.8955e-01, PNorm = 54.3465, GNorm = 0.6178, lr_0 = 9.1345e-04
Loss = 1.9869e-01, PNorm = 54.3816, GNorm = 1.0024, lr_0 = 9.1282e-04
Loss = 2.0773e-01, PNorm = 54.4042, GNorm = 1.4608, lr_0 = 9.1220e-04
Loss = 1.8642e-01, PNorm = 54.4336, GNorm = 0.8436, lr_0 = 9.1157e-04
Loss = 1.8884e-01, PNorm = 54.4572, GNorm = 2.0049, lr_0 = 9.1095e-04
Loss = 1.8806e-01, PNorm = 54.4837, GNorm = 2.0093, lr_0 = 9.1032e-04
Loss = 1.9014e-01, PNorm = 54.5067, GNorm = 0.8062, lr_0 = 9.0970e-04
Loss = 2.1372e-01, PNorm = 54.5253, GNorm = 1.2907, lr_0 = 9.0908e-04
Loss = 2.1748e-01, PNorm = 54.5469, GNorm = 1.1336, lr_0 = 9.0846e-04
Loss = 2.0043e-01, PNorm = 54.5740, GNorm = 0.9118, lr_0 = 9.0783e-04
Loss = 2.0538e-01, PNorm = 54.6077, GNorm = 1.5156, lr_0 = 9.0721e-04
Loss = 1.8612e-01, PNorm = 54.6326, GNorm = 1.6054, lr_0 = 9.0659e-04
Loss = 2.0812e-01, PNorm = 54.6557, GNorm = 2.0552, lr_0 = 9.0597e-04
Loss = 2.0142e-01, PNorm = 54.6808, GNorm = 0.9367, lr_0 = 9.0535e-04
Loss = 2.4309e-01, PNorm = 54.7041, GNorm = 1.3452, lr_0 = 9.0473e-04
Loss = 2.1955e-01, PNorm = 54.7286, GNorm = 2.1066, lr_0 = 9.0411e-04
Loss = 1.8125e-01, PNorm = 54.7634, GNorm = 0.8539, lr_0 = 9.0349e-04
Loss = 2.0105e-01, PNorm = 54.7839, GNorm = 0.7028, lr_0 = 9.0287e-04
Loss = 2.0830e-01, PNorm = 54.8094, GNorm = 1.9423, lr_0 = 9.0225e-04
Loss = 2.2771e-01, PNorm = 54.8415, GNorm = 1.4789, lr_0 = 9.0163e-04
Loss = 2.2503e-01, PNorm = 54.8694, GNorm = 1.1835, lr_0 = 9.0102e-04
Loss = 1.9979e-01, PNorm = 54.8926, GNorm = 1.1609, lr_0 = 9.0040e-04
Loss = 1.7874e-01, PNorm = 54.9187, GNorm = 1.1158, lr_0 = 8.9978e-04
Loss = 1.9982e-01, PNorm = 54.9418, GNorm = 1.0057, lr_0 = 8.9916e-04
Loss = 1.9754e-01, PNorm = 54.9683, GNorm = 1.2998, lr_0 = 8.9855e-04
Loss = 2.1292e-01, PNorm = 54.9988, GNorm = 1.1611, lr_0 = 8.9793e-04
Loss = 1.9233e-01, PNorm = 55.0280, GNorm = 0.7042, lr_0 = 8.9732e-04
Loss = 2.4862e-01, PNorm = 55.0565, GNorm = 2.5614, lr_0 = 8.9670e-04
Loss = 2.1974e-01, PNorm = 55.0952, GNorm = 0.9837, lr_0 = 8.9609e-04
Loss = 1.8677e-01, PNorm = 55.1208, GNorm = 2.1324, lr_0 = 8.9548e-04
Loss = 2.1357e-01, PNorm = 55.1482, GNorm = 1.6968, lr_0 = 8.9486e-04
Loss = 2.4758e-01, PNorm = 55.1648, GNorm = 2.1212, lr_0 = 8.9425e-04
Loss = 1.9894e-01, PNorm = 55.1870, GNorm = 1.3622, lr_0 = 8.9364e-04
Loss = 1.9294e-01, PNorm = 55.2106, GNorm = 2.3401, lr_0 = 8.9302e-04
Loss = 2.0861e-01, PNorm = 55.2432, GNorm = 1.8782, lr_0 = 8.9241e-04
Loss = 2.1446e-01, PNorm = 55.2712, GNorm = 0.9851, lr_0 = 8.9180e-04
Loss = 2.0186e-01, PNorm = 55.2967, GNorm = 0.7905, lr_0 = 8.9119e-04
Loss = 1.9200e-01, PNorm = 55.3144, GNorm = 0.9981, lr_0 = 8.9058e-04
Loss = 2.0469e-01, PNorm = 55.3330, GNorm = 0.6665, lr_0 = 8.8997e-04
Loss = 2.0423e-01, PNorm = 55.3480, GNorm = 1.5362, lr_0 = 8.8936e-04
Loss = 1.8661e-01, PNorm = 55.3751, GNorm = 1.1605, lr_0 = 8.8875e-04
Loss = 2.1119e-01, PNorm = 55.3963, GNorm = 1.5062, lr_0 = 8.8814e-04
Loss = 2.0604e-01, PNorm = 55.4153, GNorm = 0.9585, lr_0 = 8.8753e-04
Loss = 1.9367e-01, PNorm = 55.4440, GNorm = 2.0252, lr_0 = 8.8693e-04
Loss = 2.0747e-01, PNorm = 55.4727, GNorm = 0.5202, lr_0 = 8.8632e-04
Loss = 1.8407e-01, PNorm = 55.4974, GNorm = 0.5340, lr_0 = 8.8571e-04
Loss = 1.9725e-01, PNorm = 55.5243, GNorm = 0.8256, lr_0 = 8.8510e-04
Loss = 2.1365e-01, PNorm = 55.5545, GNorm = 1.1075, lr_0 = 8.8450e-04
Loss = 2.7567e-01, PNorm = 55.5973, GNorm = 2.2591, lr_0 = 8.8389e-04
Loss = 2.3809e-01, PNorm = 55.6344, GNorm = 1.9956, lr_0 = 8.8329e-04
Loss = 2.0466e-01, PNorm = 55.6605, GNorm = 1.1085, lr_0 = 8.8268e-04
Loss = 2.2158e-01, PNorm = 55.6900, GNorm = 0.9877, lr_0 = 8.8208e-04
Loss = 2.2376e-01, PNorm = 55.7173, GNorm = 0.7815, lr_0 = 8.8147e-04
Loss = 2.3001e-01, PNorm = 55.7436, GNorm = 1.0798, lr_0 = 8.8087e-04
Loss = 1.9726e-01, PNorm = 55.7800, GNorm = 0.9243, lr_0 = 8.8026e-04
Loss = 1.8464e-01, PNorm = 55.8098, GNorm = 1.1472, lr_0 = 8.7966e-04
Loss = 2.2486e-01, PNorm = 55.8247, GNorm = 1.2213, lr_0 = 8.7906e-04
Loss = 2.0777e-01, PNorm = 55.8620, GNorm = 1.4906, lr_0 = 8.7846e-04
Loss = 2.0832e-01, PNorm = 55.8974, GNorm = 0.6839, lr_0 = 8.7785e-04
Loss = 2.1697e-01, PNorm = 55.9117, GNorm = 1.7459, lr_0 = 8.7725e-04
Loss = 2.2167e-01, PNorm = 55.9346, GNorm = 1.4215, lr_0 = 8.7665e-04
Loss = 2.1886e-01, PNorm = 55.9645, GNorm = 0.7249, lr_0 = 8.7605e-04
Loss = 2.0106e-01, PNorm = 56.0034, GNorm = 0.6213, lr_0 = 8.7545e-04
Loss = 1.8918e-01, PNorm = 56.0236, GNorm = 1.0831, lr_0 = 8.7485e-04
Loss = 2.2242e-01, PNorm = 56.0508, GNorm = 2.3357, lr_0 = 8.7425e-04
Loss = 1.9331e-01, PNorm = 56.0720, GNorm = 1.1873, lr_0 = 8.7365e-04
Loss = 2.2364e-01, PNorm = 56.1099, GNorm = 2.8807, lr_0 = 8.7306e-04
Loss = 2.1250e-01, PNorm = 56.1359, GNorm = 0.7558, lr_0 = 8.7246e-04
Loss = 1.8407e-01, PNorm = 56.1680, GNorm = 0.8746, lr_0 = 8.7186e-04
Loss = 2.3379e-01, PNorm = 56.1867, GNorm = 2.8692, lr_0 = 8.7126e-04
Loss = 1.8967e-01, PNorm = 56.2159, GNorm = 1.0625, lr_0 = 8.7067e-04
Loss = 1.8786e-01, PNorm = 56.2415, GNorm = 1.9134, lr_0 = 8.7007e-04
Loss = 2.0100e-01, PNorm = 56.2682, GNorm = 1.2943, lr_0 = 8.6947e-04
Loss = 1.9953e-01, PNorm = 56.2932, GNorm = 1.2795, lr_0 = 8.6888e-04
Loss = 1.9841e-01, PNorm = 56.3209, GNorm = 0.9230, lr_0 = 8.6828e-04
Loss = 1.7634e-01, PNorm = 56.3389, GNorm = 1.4127, lr_0 = 8.6769e-04
Loss = 1.9947e-01, PNorm = 56.3643, GNorm = 1.5886, lr_0 = 8.6709e-04
Loss = 1.8835e-01, PNorm = 56.3858, GNorm = 0.7330, lr_0 = 8.6650e-04
Loss = 1.9029e-01, PNorm = 56.4050, GNorm = 1.5998, lr_0 = 8.6590e-04
Loss = 1.8818e-01, PNorm = 56.4263, GNorm = 1.2539, lr_0 = 8.6531e-04
Loss = 1.9382e-01, PNorm = 56.4507, GNorm = 1.5199, lr_0 = 8.6472e-04
Loss = 2.1757e-01, PNorm = 56.4709, GNorm = 1.3276, lr_0 = 8.6413e-04
Loss = 1.9899e-01, PNorm = 56.4825, GNorm = 0.7329, lr_0 = 8.6353e-04
Loss = 2.0918e-01, PNorm = 56.5088, GNorm = 1.3200, lr_0 = 8.6294e-04
Loss = 1.9678e-01, PNorm = 56.5258, GNorm = 0.7214, lr_0 = 8.6235e-04
Loss = 2.1502e-01, PNorm = 56.5415, GNorm = 3.2871, lr_0 = 8.6176e-04
Loss = 1.9063e-01, PNorm = 56.5637, GNorm = 0.7824, lr_0 = 8.6117e-04
Loss = 1.8297e-01, PNorm = 56.5853, GNorm = 1.4111, lr_0 = 8.6058e-04
Loss = 1.8371e-01, PNorm = 56.6078, GNorm = 0.7387, lr_0 = 8.5999e-04
Loss = 2.0638e-01, PNorm = 56.6357, GNorm = 0.7287, lr_0 = 8.5940e-04
Loss = 2.0899e-01, PNorm = 56.6656, GNorm = 1.3840, lr_0 = 8.5881e-04
Loss = 2.1642e-01, PNorm = 56.6999, GNorm = 1.0171, lr_0 = 8.5823e-04
Loss = 2.1065e-01, PNorm = 56.7174, GNorm = 1.3086, lr_0 = 8.5764e-04
Loss = 2.0507e-01, PNorm = 56.7447, GNorm = 0.8482, lr_0 = 8.5705e-04
Loss = 2.1223e-01, PNorm = 56.7726, GNorm = 0.8110, lr_0 = 8.5646e-04
Loss = 1.8302e-01, PNorm = 56.7993, GNorm = 0.8882, lr_0 = 8.5588e-04
Loss = 1.9136e-01, PNorm = 56.8293, GNorm = 1.2446, lr_0 = 8.5529e-04
Loss = 2.1402e-01, PNorm = 56.8621, GNorm = 1.2609, lr_0 = 8.5470e-04
Loss = 1.9235e-01, PNorm = 56.8858, GNorm = 1.0574, lr_0 = 8.5412e-04
Loss = 1.7761e-01, PNorm = 56.9117, GNorm = 1.8130, lr_0 = 8.5353e-04
Loss = 1.9555e-01, PNorm = 56.9255, GNorm = 0.9699, lr_0 = 8.5295e-04
Loss = 1.8993e-01, PNorm = 56.9499, GNorm = 1.0480, lr_0 = 8.5236e-04
Loss = 2.0170e-01, PNorm = 56.9652, GNorm = 1.1850, lr_0 = 8.5178e-04
Loss = 1.8841e-01, PNorm = 56.9779, GNorm = 0.9853, lr_0 = 8.5120e-04
Loss = 1.7120e-01, PNorm = 57.0054, GNorm = 1.2830, lr_0 = 8.5061e-04
Loss = 2.1486e-01, PNorm = 57.0264, GNorm = 1.5265, lr_0 = 8.5003e-04
Loss = 1.8302e-01, PNorm = 57.0535, GNorm = 1.5053, lr_0 = 8.4945e-04
Loss = 1.9423e-01, PNorm = 57.0727, GNorm = 1.8502, lr_0 = 8.4887e-04
Loss = 1.7364e-01, PNorm = 57.1006, GNorm = 1.1263, lr_0 = 8.4828e-04
Validation mae = 0.271017
Epoch 4
Loss = 1.8293e-01, PNorm = 57.1243, GNorm = 1.2746, lr_0 = 8.4770e-04
Loss = 1.8189e-01, PNorm = 57.1511, GNorm = 0.7045, lr_0 = 8.4712e-04
Loss = 1.7582e-01, PNorm = 57.1692, GNorm = 1.4723, lr_0 = 8.4654e-04
Loss = 1.7746e-01, PNorm = 57.1973, GNorm = 0.8487, lr_0 = 8.4596e-04
Loss = 1.8391e-01, PNorm = 57.2229, GNorm = 1.5076, lr_0 = 8.4538e-04
Loss = 1.6296e-01, PNorm = 57.2484, GNorm = 1.6036, lr_0 = 8.4480e-04
Loss = 1.9295e-01, PNorm = 57.2676, GNorm = 1.8019, lr_0 = 8.4423e-04
Loss = 2.2326e-01, PNorm = 57.2996, GNorm = 1.1275, lr_0 = 8.4365e-04
Loss = 1.9279e-01, PNorm = 57.3335, GNorm = 1.2867, lr_0 = 8.4307e-04
Loss = 1.7863e-01, PNorm = 57.3594, GNorm = 0.5930, lr_0 = 8.4249e-04
Loss = 1.9235e-01, PNorm = 57.3847, GNorm = 1.5157, lr_0 = 8.4191e-04
Loss = 2.0627e-01, PNorm = 57.4162, GNorm = 1.5973, lr_0 = 8.4134e-04
Loss = 1.9931e-01, PNorm = 57.4412, GNorm = 0.5374, lr_0 = 8.4076e-04
Loss = 1.7063e-01, PNorm = 57.4655, GNorm = 1.1317, lr_0 = 8.4019e-04
Loss = 1.9882e-01, PNorm = 57.4815, GNorm = 2.1719, lr_0 = 8.3961e-04
Loss = 1.9436e-01, PNorm = 57.5165, GNorm = 1.4745, lr_0 = 8.3903e-04
Loss = 1.8965e-01, PNorm = 57.5360, GNorm = 0.8507, lr_0 = 8.3846e-04
Loss = 1.8568e-01, PNorm = 57.5591, GNorm = 0.8695, lr_0 = 8.3789e-04
Loss = 1.8015e-01, PNorm = 57.5773, GNorm = 1.3514, lr_0 = 8.3731e-04
Loss = 1.8501e-01, PNorm = 57.6013, GNorm = 0.9073, lr_0 = 8.3674e-04
Loss = 1.7179e-01, PNorm = 57.6343, GNorm = 0.6857, lr_0 = 8.3616e-04
Loss = 1.8860e-01, PNorm = 57.6597, GNorm = 0.5905, lr_0 = 8.3559e-04
Loss = 1.8332e-01, PNorm = 57.6739, GNorm = 0.8850, lr_0 = 8.3502e-04
Loss = 1.8351e-01, PNorm = 57.6955, GNorm = 1.2280, lr_0 = 8.3445e-04
Loss = 1.9205e-01, PNorm = 57.7203, GNorm = 2.0800, lr_0 = 8.3388e-04
Loss = 1.9106e-01, PNorm = 57.7394, GNorm = 2.2390, lr_0 = 8.3330e-04
Loss = 1.7527e-01, PNorm = 57.7697, GNorm = 1.6564, lr_0 = 8.3273e-04
Loss = 1.8244e-01, PNorm = 57.7876, GNorm = 2.1338, lr_0 = 8.3216e-04
Loss = 2.0734e-01, PNorm = 57.8117, GNorm = 0.5581, lr_0 = 8.3159e-04
Loss = 1.8490e-01, PNorm = 57.8446, GNorm = 2.4289, lr_0 = 8.3102e-04
Loss = 1.9870e-01, PNorm = 57.8687, GNorm = 0.6458, lr_0 = 8.3045e-04
Loss = 2.0146e-01, PNorm = 57.8892, GNorm = 1.0766, lr_0 = 8.2988e-04
Loss = 2.0098e-01, PNorm = 57.9177, GNorm = 0.7199, lr_0 = 8.2932e-04
Loss = 2.0721e-01, PNorm = 57.9449, GNorm = 0.9043, lr_0 = 8.2875e-04
Loss = 2.1204e-01, PNorm = 57.9657, GNorm = 1.8721, lr_0 = 8.2818e-04
Loss = 1.7944e-01, PNorm = 57.9933, GNorm = 0.7003, lr_0 = 8.2761e-04
Loss = 1.9874e-01, PNorm = 58.0194, GNorm = 0.9534, lr_0 = 8.2705e-04
Loss = 1.8090e-01, PNorm = 58.0311, GNorm = 0.9048, lr_0 = 8.2648e-04
Loss = 1.8528e-01, PNorm = 58.0484, GNorm = 0.8660, lr_0 = 8.2591e-04
Loss = 1.8570e-01, PNorm = 58.0730, GNorm = 1.1818, lr_0 = 8.2535e-04
Loss = 1.8915e-01, PNorm = 58.0867, GNorm = 1.1406, lr_0 = 8.2478e-04
Loss = 2.1039e-01, PNorm = 58.1159, GNorm = 1.2632, lr_0 = 8.2422e-04
Loss = 1.7794e-01, PNorm = 58.1410, GNorm = 1.8864, lr_0 = 8.2365e-04
Loss = 2.0319e-01, PNorm = 58.1720, GNorm = 1.7934, lr_0 = 8.2309e-04
Loss = 1.9088e-01, PNorm = 58.1964, GNorm = 0.8170, lr_0 = 8.2252e-04
Loss = 1.8681e-01, PNorm = 58.2200, GNorm = 1.2151, lr_0 = 8.2196e-04
Loss = 1.8112e-01, PNorm = 58.2454, GNorm = 2.2400, lr_0 = 8.2140e-04
Loss = 1.9353e-01, PNorm = 58.2684, GNorm = 1.7742, lr_0 = 8.2084e-04
Loss = 1.8962e-01, PNorm = 58.2915, GNorm = 0.7564, lr_0 = 8.2027e-04
Loss = 1.9021e-01, PNorm = 58.3124, GNorm = 0.8469, lr_0 = 8.1971e-04
Loss = 1.9478e-01, PNorm = 58.3270, GNorm = 2.0602, lr_0 = 8.1915e-04
Loss = 1.7018e-01, PNorm = 58.3496, GNorm = 2.4130, lr_0 = 8.1859e-04
Loss = 1.9253e-01, PNorm = 58.3690, GNorm = 0.8697, lr_0 = 8.1803e-04
Loss = 1.7635e-01, PNorm = 58.3969, GNorm = 1.0902, lr_0 = 8.1747e-04
Loss = 2.2636e-01, PNorm = 58.4220, GNorm = 1.7307, lr_0 = 8.1691e-04
Loss = 1.8725e-01, PNorm = 58.4483, GNorm = 0.7846, lr_0 = 8.1635e-04
Loss = 1.9620e-01, PNorm = 58.4738, GNorm = 1.1792, lr_0 = 8.1579e-04
Loss = 1.8523e-01, PNorm = 58.4988, GNorm = 1.2501, lr_0 = 8.1523e-04
Loss = 1.8026e-01, PNorm = 58.5204, GNorm = 0.5766, lr_0 = 8.1467e-04
Loss = 1.7732e-01, PNorm = 58.5404, GNorm = 0.8510, lr_0 = 8.1411e-04
Loss = 1.7576e-01, PNorm = 58.5631, GNorm = 1.0425, lr_0 = 8.1355e-04
Loss = 1.8742e-01, PNorm = 58.5846, GNorm = 0.8813, lr_0 = 8.1300e-04
Loss = 1.9436e-01, PNorm = 58.6030, GNorm = 1.2834, lr_0 = 8.1244e-04
Loss = 2.0250e-01, PNorm = 58.6294, GNorm = 2.3399, lr_0 = 8.1188e-04
Loss = 1.8238e-01, PNorm = 58.6585, GNorm = 0.7741, lr_0 = 8.1133e-04
Loss = 1.8616e-01, PNorm = 58.6863, GNorm = 0.7060, lr_0 = 8.1077e-04
Loss = 1.7058e-01, PNorm = 58.7042, GNorm = 0.9053, lr_0 = 8.1022e-04
Loss = 1.5989e-01, PNorm = 58.7246, GNorm = 0.6436, lr_0 = 8.0966e-04
Loss = 2.0510e-01, PNorm = 58.7464, GNorm = 0.7566, lr_0 = 8.0911e-04
Loss = 2.0303e-01, PNorm = 58.7797, GNorm = 2.8812, lr_0 = 8.0855e-04
Loss = 2.0447e-01, PNorm = 58.8053, GNorm = 1.9991, lr_0 = 8.0800e-04
Loss = 1.9145e-01, PNorm = 58.8332, GNorm = 0.7049, lr_0 = 8.0745e-04
Loss = 2.1512e-01, PNorm = 58.8553, GNorm = 2.0568, lr_0 = 8.0689e-04
Loss = 2.2053e-01, PNorm = 58.8947, GNorm = 1.5873, lr_0 = 8.0634e-04
Loss = 2.0270e-01, PNorm = 58.9291, GNorm = 1.0628, lr_0 = 8.0579e-04
Loss = 1.7378e-01, PNorm = 58.9633, GNorm = 0.6761, lr_0 = 8.0523e-04
Loss = 1.8320e-01, PNorm = 58.9887, GNorm = 0.7090, lr_0 = 8.0468e-04
Loss = 2.0582e-01, PNorm = 59.0113, GNorm = 0.7627, lr_0 = 8.0413e-04
Loss = 1.7849e-01, PNorm = 59.0351, GNorm = 1.1218, lr_0 = 8.0358e-04
Loss = 2.0129e-01, PNorm = 59.0685, GNorm = 0.7610, lr_0 = 8.0303e-04
Loss = 1.7715e-01, PNorm = 59.0958, GNorm = 0.8115, lr_0 = 8.0248e-04
Loss = 1.7607e-01, PNorm = 59.1239, GNorm = 0.8003, lr_0 = 8.0193e-04
Loss = 1.7299e-01, PNorm = 59.1493, GNorm = 0.6234, lr_0 = 8.0138e-04
Loss = 1.6711e-01, PNorm = 59.1673, GNorm = 1.1745, lr_0 = 8.0083e-04
Loss = 1.8251e-01, PNorm = 59.1912, GNorm = 0.5436, lr_0 = 8.0028e-04
Loss = 1.6940e-01, PNorm = 59.2040, GNorm = 0.7564, lr_0 = 7.9974e-04
Loss = 1.9777e-01, PNorm = 59.2251, GNorm = 1.1628, lr_0 = 7.9919e-04
Loss = 1.7118e-01, PNorm = 59.2471, GNorm = 2.2773, lr_0 = 7.9864e-04
Loss = 1.9523e-01, PNorm = 59.2759, GNorm = 1.2975, lr_0 = 7.9809e-04
Loss = 1.8924e-01, PNorm = 59.3065, GNorm = 1.0343, lr_0 = 7.9755e-04
Loss = 1.8973e-01, PNorm = 59.3339, GNorm = 0.7957, lr_0 = 7.9700e-04
Loss = 1.6667e-01, PNorm = 59.3654, GNorm = 1.1765, lr_0 = 7.9645e-04
Loss = 1.9045e-01, PNorm = 59.3830, GNorm = 1.0198, lr_0 = 7.9591e-04
Loss = 1.6853e-01, PNorm = 59.4029, GNorm = 1.0654, lr_0 = 7.9536e-04
Loss = 1.9699e-01, PNorm = 59.4245, GNorm = 1.7672, lr_0 = 7.9482e-04
Loss = 1.8005e-01, PNorm = 59.4448, GNorm = 0.8337, lr_0 = 7.9427e-04
Loss = 2.0121e-01, PNorm = 59.4712, GNorm = 0.5607, lr_0 = 7.9373e-04
Loss = 1.8288e-01, PNorm = 59.4975, GNorm = 0.8927, lr_0 = 7.9319e-04
Loss = 1.7343e-01, PNorm = 59.5201, GNorm = 0.9031, lr_0 = 7.9264e-04
Loss = 2.3379e-01, PNorm = 59.5425, GNorm = 1.1666, lr_0 = 7.9210e-04
Loss = 2.1419e-01, PNorm = 59.5727, GNorm = 1.6320, lr_0 = 7.9156e-04
Loss = 1.7664e-01, PNorm = 59.6068, GNorm = 1.0917, lr_0 = 7.9101e-04
Loss = 1.9687e-01, PNorm = 59.6318, GNorm = 0.6133, lr_0 = 7.9047e-04
Loss = 1.8845e-01, PNorm = 59.6600, GNorm = 0.8845, lr_0 = 7.8993e-04
Loss = 1.9481e-01, PNorm = 59.6756, GNorm = 0.7804, lr_0 = 7.8939e-04
Loss = 1.9494e-01, PNorm = 59.6917, GNorm = 1.1853, lr_0 = 7.8885e-04
Loss = 1.9138e-01, PNorm = 59.7079, GNorm = 0.8512, lr_0 = 7.8831e-04
Loss = 1.7427e-01, PNorm = 59.7326, GNorm = 0.8796, lr_0 = 7.8777e-04
Loss = 1.7899e-01, PNorm = 59.7579, GNorm = 0.8190, lr_0 = 7.8723e-04
Loss = 1.8975e-01, PNorm = 59.7904, GNorm = 1.5370, lr_0 = 7.8669e-04
Loss = 1.9278e-01, PNorm = 59.8110, GNorm = 1.2168, lr_0 = 7.8615e-04
Loss = 1.7947e-01, PNorm = 59.8345, GNorm = 0.5500, lr_0 = 7.8561e-04
Loss = 1.9465e-01, PNorm = 59.8521, GNorm = 0.6424, lr_0 = 7.8507e-04
Loss = 2.1188e-01, PNorm = 59.8750, GNorm = 1.6445, lr_0 = 7.8454e-04
Loss = 1.8136e-01, PNorm = 59.9069, GNorm = 1.9918, lr_0 = 7.8400e-04
Loss = 1.7562e-01, PNorm = 59.9301, GNorm = 0.7064, lr_0 = 7.8346e-04
Loss = 1.5070e-01, PNorm = 59.9485, GNorm = 0.6698, lr_0 = 7.8293e-04
Loss = 1.8961e-01, PNorm = 59.9726, GNorm = 0.6484, lr_0 = 7.8239e-04
Loss = 2.2227e-01, PNorm = 59.9945, GNorm = 0.8379, lr_0 = 7.8185e-04
Loss = 2.0319e-01, PNorm = 60.0230, GNorm = 1.2340, lr_0 = 7.8132e-04
Validation mae = 0.252699
Epoch 5
Loss = 1.6938e-01, PNorm = 60.0504, GNorm = 0.5547, lr_0 = 7.8078e-04
Loss = 1.7548e-01, PNorm = 60.0784, GNorm = 1.5449, lr_0 = 7.8025e-04
Loss = 1.8560e-01, PNorm = 60.0978, GNorm = 0.7855, lr_0 = 7.7971e-04
Loss = 2.2255e-01, PNorm = 60.1262, GNorm = 1.4757, lr_0 = 7.7918e-04
Loss = 1.6970e-01, PNorm = 60.1521, GNorm = 1.8812, lr_0 = 7.7864e-04
Loss = 1.8507e-01, PNorm = 60.1702, GNorm = 1.0435, lr_0 = 7.7811e-04
Loss = 1.8267e-01, PNorm = 60.1977, GNorm = 1.5919, lr_0 = 7.7758e-04
Loss = 1.6553e-01, PNorm = 60.2217, GNorm = 1.0743, lr_0 = 7.7705e-04
Loss = 1.7065e-01, PNorm = 60.2443, GNorm = 0.7034, lr_0 = 7.7651e-04
Loss = 1.7301e-01, PNorm = 60.2625, GNorm = 0.6073, lr_0 = 7.7598e-04
Loss = 1.5201e-01, PNorm = 60.2804, GNorm = 1.0264, lr_0 = 7.7545e-04
Loss = 1.8728e-01, PNorm = 60.2998, GNorm = 0.7015, lr_0 = 7.7492e-04
Loss = 1.8657e-01, PNorm = 60.3277, GNorm = 0.9739, lr_0 = 7.7439e-04
Loss = 1.8294e-01, PNorm = 60.3475, GNorm = 0.8081, lr_0 = 7.7386e-04
Loss = 1.8884e-01, PNorm = 60.3664, GNorm = 0.6785, lr_0 = 7.7333e-04
Loss = 1.6081e-01, PNorm = 60.3969, GNorm = 0.3565, lr_0 = 7.7280e-04
Loss = 1.6571e-01, PNorm = 60.4260, GNorm = 0.8605, lr_0 = 7.7227e-04
Loss = 1.7135e-01, PNorm = 60.4550, GNorm = 0.9771, lr_0 = 7.7174e-04
Loss = 1.8209e-01, PNorm = 60.4859, GNorm = 1.0133, lr_0 = 7.7121e-04
Loss = 1.7387e-01, PNorm = 60.5082, GNorm = 0.9195, lr_0 = 7.7068e-04
Loss = 1.8893e-01, PNorm = 60.5379, GNorm = 2.1892, lr_0 = 7.7015e-04
Loss = 1.7058e-01, PNorm = 60.5607, GNorm = 0.8836, lr_0 = 7.6963e-04
Loss = 1.9553e-01, PNorm = 60.5901, GNorm = 1.5038, lr_0 = 7.6910e-04
Loss = 1.8085e-01, PNorm = 60.6177, GNorm = 0.6493, lr_0 = 7.6857e-04
Loss = 1.9039e-01, PNorm = 60.6457, GNorm = 1.1703, lr_0 = 7.6805e-04
Loss = 1.7903e-01, PNorm = 60.6701, GNorm = 1.4301, lr_0 = 7.6752e-04
Loss = 1.8013e-01, PNorm = 60.6977, GNorm = 0.9770, lr_0 = 7.6699e-04
Loss = 1.6459e-01, PNorm = 60.7201, GNorm = 0.6268, lr_0 = 7.6647e-04
Loss = 1.7738e-01, PNorm = 60.7444, GNorm = 1.4102, lr_0 = 7.6594e-04
Loss = 1.5590e-01, PNorm = 60.7705, GNorm = 1.0600, lr_0 = 7.6542e-04
Loss = 1.7407e-01, PNorm = 60.8007, GNorm = 1.2436, lr_0 = 7.6489e-04
Loss = 2.1600e-01, PNorm = 60.8344, GNorm = 1.4560, lr_0 = 7.6437e-04
Loss = 2.0536e-01, PNorm = 60.8682, GNorm = 1.8516, lr_0 = 7.6385e-04
Loss = 2.0845e-01, PNorm = 60.9112, GNorm = 2.7965, lr_0 = 7.6332e-04
Loss = 1.8055e-01, PNorm = 60.9494, GNorm = 0.6009, lr_0 = 7.6280e-04
Loss = 1.6317e-01, PNorm = 60.9719, GNorm = 0.5621, lr_0 = 7.6228e-04
Loss = 1.7727e-01, PNorm = 60.9936, GNorm = 0.7460, lr_0 = 7.6176e-04
Loss = 1.8518e-01, PNorm = 61.0192, GNorm = 1.0582, lr_0 = 7.6123e-04
Loss = 2.0035e-01, PNorm = 61.0443, GNorm = 0.9300, lr_0 = 7.6071e-04
Loss = 1.4491e-01, PNorm = 61.0675, GNorm = 0.5696, lr_0 = 7.6019e-04
Loss = 1.5836e-01, PNorm = 61.0884, GNorm = 0.8648, lr_0 = 7.5967e-04
Loss = 1.7675e-01, PNorm = 61.1127, GNorm = 0.7597, lr_0 = 7.5915e-04
Loss = 1.6941e-01, PNorm = 61.1246, GNorm = 0.7885, lr_0 = 7.5863e-04
Loss = 1.6029e-01, PNorm = 61.1474, GNorm = 0.8604, lr_0 = 7.5811e-04
Loss = 1.6966e-01, PNorm = 61.1671, GNorm = 0.6722, lr_0 = 7.5759e-04
Loss = 1.9206e-01, PNorm = 61.1903, GNorm = 0.6967, lr_0 = 7.5707e-04
Loss = 1.9615e-01, PNorm = 61.2099, GNorm = 1.3383, lr_0 = 7.5655e-04
Loss = 1.7277e-01, PNorm = 61.2358, GNorm = 1.4642, lr_0 = 7.5603e-04
Loss = 1.7846e-01, PNorm = 61.2538, GNorm = 0.9959, lr_0 = 7.5552e-04
Loss = 1.8418e-01, PNorm = 61.2777, GNorm = 1.0482, lr_0 = 7.5500e-04
Loss = 1.9364e-01, PNorm = 61.3071, GNorm = 1.3541, lr_0 = 7.5448e-04
Loss = 1.6229e-01, PNorm = 61.3439, GNorm = 1.1516, lr_0 = 7.5397e-04
Loss = 1.7460e-01, PNorm = 61.3668, GNorm = 1.0524, lr_0 = 7.5345e-04
Loss = 1.8051e-01, PNorm = 61.3885, GNorm = 2.2307, lr_0 = 7.5293e-04
Loss = 1.8373e-01, PNorm = 61.4065, GNorm = 0.9490, lr_0 = 7.5242e-04
Loss = 1.9600e-01, PNorm = 61.4305, GNorm = 1.1661, lr_0 = 7.5190e-04
Loss = 1.6381e-01, PNorm = 61.4525, GNorm = 0.9439, lr_0 = 7.5139e-04
Loss = 1.9807e-01, PNorm = 61.4748, GNorm = 0.8828, lr_0 = 7.5087e-04
Loss = 1.9846e-01, PNorm = 61.5005, GNorm = 0.9512, lr_0 = 7.5036e-04
Loss = 1.9019e-01, PNorm = 61.5331, GNorm = 1.0337, lr_0 = 7.4984e-04
Loss = 1.7851e-01, PNorm = 61.5638, GNorm = 1.0065, lr_0 = 7.4933e-04
Loss = 1.7276e-01, PNorm = 61.5891, GNorm = 0.8285, lr_0 = 7.4882e-04
Loss = 1.7394e-01, PNorm = 61.6162, GNorm = 0.9678, lr_0 = 7.4830e-04
Loss = 1.7395e-01, PNorm = 61.6343, GNorm = 1.0010, lr_0 = 7.4779e-04
Loss = 2.1408e-01, PNorm = 61.6591, GNorm = 1.1574, lr_0 = 7.4728e-04
Loss = 1.7461e-01, PNorm = 61.6792, GNorm = 1.0505, lr_0 = 7.4677e-04
Loss = 2.0902e-01, PNorm = 61.7065, GNorm = 1.0919, lr_0 = 7.4625e-04
Loss = 1.9862e-01, PNorm = 61.7313, GNorm = 1.6158, lr_0 = 7.4574e-04
Loss = 1.9813e-01, PNorm = 61.7518, GNorm = 2.0531, lr_0 = 7.4523e-04
Loss = 1.7731e-01, PNorm = 61.7686, GNorm = 0.8184, lr_0 = 7.4472e-04
Loss = 1.6799e-01, PNorm = 61.7936, GNorm = 0.6167, lr_0 = 7.4421e-04
Loss = 1.6806e-01, PNorm = 61.8131, GNorm = 2.0366, lr_0 = 7.4370e-04
Loss = 2.2675e-01, PNorm = 61.8269, GNorm = 0.7291, lr_0 = 7.4319e-04
Loss = 1.7607e-01, PNorm = 61.8633, GNorm = 0.9237, lr_0 = 7.4268e-04
Loss = 1.7803e-01, PNorm = 61.8954, GNorm = 1.1957, lr_0 = 7.4217e-04
Loss = 1.6195e-01, PNorm = 61.9121, GNorm = 0.5761, lr_0 = 7.4167e-04
Loss = 1.7237e-01, PNorm = 61.9244, GNorm = 1.6323, lr_0 = 7.4116e-04
Loss = 1.9706e-01, PNorm = 61.9400, GNorm = 1.0605, lr_0 = 7.4065e-04
Loss = 1.9424e-01, PNorm = 61.9597, GNorm = 0.7203, lr_0 = 7.4014e-04
Loss = 1.5135e-01, PNorm = 61.9828, GNorm = 0.6423, lr_0 = 7.3964e-04
Loss = 1.5197e-01, PNorm = 62.0096, GNorm = 0.5227, lr_0 = 7.3913e-04
Loss = 1.6907e-01, PNorm = 62.0262, GNorm = 0.8784, lr_0 = 7.3862e-04
Loss = 1.7496e-01, PNorm = 62.0449, GNorm = 0.7152, lr_0 = 7.3812e-04
Loss = 1.9882e-01, PNorm = 62.0780, GNorm = 1.4011, lr_0 = 7.3761e-04
Loss = 1.4516e-01, PNorm = 62.1031, GNorm = 0.7706, lr_0 = 7.3711e-04
Loss = 1.6198e-01, PNorm = 62.1286, GNorm = 0.6012, lr_0 = 7.3660e-04
Loss = 1.7854e-01, PNorm = 62.1373, GNorm = 0.6437, lr_0 = 7.3610e-04
Loss = 1.7653e-01, PNorm = 62.1540, GNorm = 0.5237, lr_0 = 7.3559e-04
Loss = 2.0134e-01, PNorm = 62.1749, GNorm = 0.6037, lr_0 = 7.3509e-04
Loss = 1.8274e-01, PNorm = 62.2022, GNorm = 1.9240, lr_0 = 7.3458e-04
Loss = 1.7475e-01, PNorm = 62.2276, GNorm = 1.9147, lr_0 = 7.3408e-04
Loss = 1.7664e-01, PNorm = 62.2565, GNorm = 0.8348, lr_0 = 7.3358e-04
Loss = 1.6721e-01, PNorm = 62.2792, GNorm = 0.5987, lr_0 = 7.3308e-04
Loss = 1.6781e-01, PNorm = 62.2975, GNorm = 0.8759, lr_0 = 7.3257e-04
Loss = 2.0004e-01, PNorm = 62.3197, GNorm = 0.6601, lr_0 = 7.3207e-04
Loss = 1.6693e-01, PNorm = 62.3467, GNorm = 0.8327, lr_0 = 7.3157e-04
Loss = 1.6090e-01, PNorm = 62.3674, GNorm = 1.0579, lr_0 = 7.3107e-04
Loss = 1.7227e-01, PNorm = 62.3890, GNorm = 0.6875, lr_0 = 7.3057e-04
Loss = 1.7557e-01, PNorm = 62.4105, GNorm = 0.6484, lr_0 = 7.3007e-04
Loss = 1.5101e-01, PNorm = 62.4274, GNorm = 0.8257, lr_0 = 7.2957e-04
Loss = 1.4477e-01, PNorm = 62.4465, GNorm = 1.1761, lr_0 = 7.2907e-04
Loss = 2.0640e-01, PNorm = 62.4695, GNorm = 0.7825, lr_0 = 7.2857e-04
Loss = 1.7520e-01, PNorm = 62.4972, GNorm = 1.0339, lr_0 = 7.2807e-04
Loss = 2.0453e-01, PNorm = 62.5183, GNorm = 1.0224, lr_0 = 7.2757e-04
Loss = 1.6078e-01, PNorm = 62.5460, GNorm = 0.8448, lr_0 = 7.2707e-04
Loss = 1.7629e-01, PNorm = 62.5752, GNorm = 0.9802, lr_0 = 7.2657e-04
Loss = 1.6154e-01, PNorm = 62.6089, GNorm = 0.6067, lr_0 = 7.2608e-04
Loss = 1.7534e-01, PNorm = 62.6380, GNorm = 1.5524, lr_0 = 7.2558e-04
Loss = 1.8555e-01, PNorm = 62.6622, GNorm = 0.7997, lr_0 = 7.2508e-04
Loss = 1.7449e-01, PNorm = 62.6842, GNorm = 1.7922, lr_0 = 7.2458e-04
Loss = 1.7226e-01, PNorm = 62.7063, GNorm = 2.3343, lr_0 = 7.2409e-04
Loss = 1.9485e-01, PNorm = 62.7304, GNorm = 1.0197, lr_0 = 7.2359e-04
Loss = 1.8572e-01, PNorm = 62.7511, GNorm = 1.0977, lr_0 = 7.2310e-04
Loss = 1.5071e-01, PNorm = 62.7693, GNorm = 0.8528, lr_0 = 7.2260e-04
Loss = 1.8924e-01, PNorm = 62.7877, GNorm = 0.8600, lr_0 = 7.2211e-04
Loss = 1.5411e-01, PNorm = 62.8078, GNorm = 0.7465, lr_0 = 7.2161e-04
Loss = 1.8909e-01, PNorm = 62.8256, GNorm = 0.7448, lr_0 = 7.2112e-04
Loss = 1.7168e-01, PNorm = 62.8446, GNorm = 1.1197, lr_0 = 7.2062e-04
Loss = 1.6048e-01, PNorm = 62.8675, GNorm = 1.4428, lr_0 = 7.2013e-04
Loss = 1.8573e-01, PNorm = 62.8830, GNorm = 0.6538, lr_0 = 7.1964e-04
Validation mae = 0.252415
Epoch 6
Loss = 1.5637e-01, PNorm = 62.9047, GNorm = 1.0721, lr_0 = 7.1914e-04
Loss = 1.6903e-01, PNorm = 62.9346, GNorm = 1.5975, lr_0 = 7.1865e-04
Loss = 1.5300e-01, PNorm = 62.9528, GNorm = 0.6981, lr_0 = 7.1816e-04
Loss = 1.6081e-01, PNorm = 62.9750, GNorm = 1.3993, lr_0 = 7.1767e-04
Loss = 1.4637e-01, PNorm = 63.0022, GNorm = 0.6452, lr_0 = 7.1717e-04
Loss = 1.4610e-01, PNorm = 63.0207, GNorm = 0.8333, lr_0 = 7.1668e-04
Loss = 1.7977e-01, PNorm = 63.0384, GNorm = 0.7361, lr_0 = 7.1619e-04
Loss = 1.5186e-01, PNorm = 63.0643, GNorm = 0.6867, lr_0 = 7.1570e-04
Loss = 1.5955e-01, PNorm = 63.0834, GNorm = 0.8695, lr_0 = 7.1521e-04
Loss = 1.5957e-01, PNorm = 63.1067, GNorm = 0.6685, lr_0 = 7.1472e-04
Loss = 2.1519e-01, PNorm = 63.1289, GNorm = 1.1194, lr_0 = 7.1423e-04
Loss = 1.4368e-01, PNorm = 63.1586, GNorm = 1.2210, lr_0 = 7.1374e-04
Loss = 1.7835e-01, PNorm = 63.1794, GNorm = 2.3795, lr_0 = 7.1325e-04
Loss = 1.6485e-01, PNorm = 63.2014, GNorm = 1.7297, lr_0 = 7.1277e-04
Loss = 1.6529e-01, PNorm = 63.2206, GNorm = 0.5094, lr_0 = 7.1228e-04
Loss = 1.5303e-01, PNorm = 63.2463, GNorm = 1.2176, lr_0 = 7.1179e-04
Loss = 1.8878e-01, PNorm = 63.2590, GNorm = 1.7750, lr_0 = 7.1130e-04
Loss = 1.5124e-01, PNorm = 63.2831, GNorm = 0.7693, lr_0 = 7.1081e-04
Loss = 1.6426e-01, PNorm = 63.3113, GNorm = 1.2722, lr_0 = 7.1033e-04
Loss = 1.5446e-01, PNorm = 63.3373, GNorm = 0.5292, lr_0 = 7.0984e-04
Loss = 1.6524e-01, PNorm = 63.3648, GNorm = 0.7235, lr_0 = 7.0935e-04
Loss = 1.5267e-01, PNorm = 63.3893, GNorm = 0.7598, lr_0 = 7.0887e-04
Loss = 1.7386e-01, PNorm = 63.4093, GNorm = 0.7831, lr_0 = 7.0838e-04
Loss = 1.5185e-01, PNorm = 63.4298, GNorm = 0.7329, lr_0 = 7.0790e-04
Loss = 1.4913e-01, PNorm = 63.4512, GNorm = 0.5957, lr_0 = 7.0741e-04
Loss = 1.6598e-01, PNorm = 63.4782, GNorm = 0.6189, lr_0 = 7.0693e-04
Loss = 1.4773e-01, PNorm = 63.4981, GNorm = 0.9544, lr_0 = 7.0644e-04
Loss = 1.5365e-01, PNorm = 63.5128, GNorm = 0.5437, lr_0 = 7.0596e-04
Loss = 1.7570e-01, PNorm = 63.5353, GNorm = 1.5779, lr_0 = 7.0548e-04
Loss = 1.5719e-01, PNorm = 63.5680, GNorm = 0.7066, lr_0 = 7.0499e-04
Loss = 1.4900e-01, PNorm = 63.5975, GNorm = 1.7593, lr_0 = 7.0451e-04
Loss = 1.6089e-01, PNorm = 63.6199, GNorm = 1.3111, lr_0 = 7.0403e-04
Loss = 1.5762e-01, PNorm = 63.6385, GNorm = 0.6757, lr_0 = 7.0354e-04
Loss = 1.6625e-01, PNorm = 63.6527, GNorm = 0.7222, lr_0 = 7.0306e-04
Loss = 1.5400e-01, PNorm = 63.6765, GNorm = 0.7104, lr_0 = 7.0258e-04
Loss = 1.7278e-01, PNorm = 63.6957, GNorm = 1.0223, lr_0 = 7.0210e-04
Loss = 1.5856e-01, PNorm = 63.7123, GNorm = 0.7356, lr_0 = 7.0162e-04
Loss = 1.5378e-01, PNorm = 63.7369, GNorm = 1.1005, lr_0 = 7.0114e-04
Loss = 1.6397e-01, PNorm = 63.7619, GNorm = 0.8584, lr_0 = 7.0066e-04
Loss = 1.8607e-01, PNorm = 63.7822, GNorm = 0.6357, lr_0 = 7.0018e-04
Loss = 1.3815e-01, PNorm = 63.8075, GNorm = 0.4589, lr_0 = 6.9970e-04
Loss = 1.7190e-01, PNorm = 63.8322, GNorm = 0.7550, lr_0 = 6.9922e-04
Loss = 1.5931e-01, PNorm = 63.8509, GNorm = 0.5869, lr_0 = 6.9874e-04
Loss = 1.7204e-01, PNorm = 63.8765, GNorm = 0.5659, lr_0 = 6.9826e-04
Loss = 1.7759e-01, PNorm = 63.8934, GNorm = 0.7986, lr_0 = 6.9778e-04
Loss = 1.9584e-01, PNorm = 63.9157, GNorm = 1.5157, lr_0 = 6.9730e-04
Loss = 1.5291e-01, PNorm = 63.9465, GNorm = 0.7357, lr_0 = 6.9683e-04
Loss = 1.6760e-01, PNorm = 63.9752, GNorm = 0.6659, lr_0 = 6.9635e-04
Loss = 1.5237e-01, PNorm = 64.0013, GNorm = 1.1211, lr_0 = 6.9587e-04
Loss = 1.6464e-01, PNorm = 64.0284, GNorm = 1.6602, lr_0 = 6.9540e-04
Loss = 1.7957e-01, PNorm = 64.0505, GNorm = 1.2842, lr_0 = 6.9492e-04
Loss = 1.5313e-01, PNorm = 64.0710, GNorm = 1.1087, lr_0 = 6.9444e-04
Loss = 1.8011e-01, PNorm = 64.0995, GNorm = 1.5431, lr_0 = 6.9397e-04
Loss = 1.7803e-01, PNorm = 64.1317, GNorm = 1.3131, lr_0 = 6.9349e-04
Loss = 1.7667e-01, PNorm = 64.1584, GNorm = 0.6936, lr_0 = 6.9302e-04
Loss = 1.8357e-01, PNorm = 64.1782, GNorm = 1.1177, lr_0 = 6.9254e-04
Loss = 1.8198e-01, PNorm = 64.2003, GNorm = 0.4767, lr_0 = 6.9207e-04
Loss = 1.5756e-01, PNorm = 64.2234, GNorm = 0.9751, lr_0 = 6.9159e-04
Loss = 1.5401e-01, PNorm = 64.2432, GNorm = 0.6713, lr_0 = 6.9112e-04
Loss = 1.9362e-01, PNorm = 64.2674, GNorm = 1.3926, lr_0 = 6.9065e-04
Loss = 1.7385e-01, PNorm = 64.2941, GNorm = 0.9270, lr_0 = 6.9017e-04
Loss = 1.5702e-01, PNorm = 64.3221, GNorm = 1.0336, lr_0 = 6.8970e-04
Loss = 1.6720e-01, PNorm = 64.3465, GNorm = 0.7468, lr_0 = 6.8923e-04
Loss = 1.6346e-01, PNorm = 64.3738, GNorm = 1.2271, lr_0 = 6.8876e-04
Loss = 1.6369e-01, PNorm = 64.3984, GNorm = 0.7198, lr_0 = 6.8828e-04
Loss = 1.7951e-01, PNorm = 64.4248, GNorm = 1.9134, lr_0 = 6.8781e-04
Loss = 1.5965e-01, PNorm = 64.4577, GNorm = 1.3513, lr_0 = 6.8734e-04
Loss = 1.5654e-01, PNorm = 64.4851, GNorm = 2.1795, lr_0 = 6.8687e-04
Loss = 1.6247e-01, PNorm = 64.5101, GNorm = 0.8310, lr_0 = 6.8640e-04
Loss = 1.6461e-01, PNorm = 64.5261, GNorm = 0.7740, lr_0 = 6.8593e-04
Loss = 1.6369e-01, PNorm = 64.5580, GNorm = 1.0709, lr_0 = 6.8546e-04
Loss = 1.5191e-01, PNorm = 64.5781, GNorm = 0.6576, lr_0 = 6.8499e-04
Loss = 1.6513e-01, PNorm = 64.6048, GNorm = 1.4709, lr_0 = 6.8452e-04
Loss = 1.8324e-01, PNorm = 64.6258, GNorm = 1.2850, lr_0 = 6.8405e-04
Loss = 1.6027e-01, PNorm = 64.6481, GNorm = 0.9225, lr_0 = 6.8358e-04
Loss = 1.3902e-01, PNorm = 64.6585, GNorm = 0.9251, lr_0 = 6.8312e-04
Loss = 1.5787e-01, PNorm = 64.6705, GNorm = 1.3841, lr_0 = 6.8265e-04
Loss = 1.5940e-01, PNorm = 64.6914, GNorm = 0.7976, lr_0 = 6.8218e-04
Loss = 1.8047e-01, PNorm = 64.7120, GNorm = 1.3042, lr_0 = 6.8171e-04
Loss = 1.7730e-01, PNorm = 64.7369, GNorm = 0.8683, lr_0 = 6.8125e-04
Loss = 1.5580e-01, PNorm = 64.7592, GNorm = 0.9612, lr_0 = 6.8078e-04
Loss = 1.5257e-01, PNorm = 64.7775, GNorm = 0.7934, lr_0 = 6.8031e-04
Loss = 1.6068e-01, PNorm = 64.7885, GNorm = 0.8119, lr_0 = 6.7985e-04
Loss = 1.6882e-01, PNorm = 64.8052, GNorm = 0.6875, lr_0 = 6.7938e-04
Loss = 1.7383e-01, PNorm = 64.8372, GNorm = 1.0325, lr_0 = 6.7892e-04
Loss = 1.7076e-01, PNorm = 64.8690, GNorm = 0.8585, lr_0 = 6.7845e-04
Loss = 1.8774e-01, PNorm = 64.8885, GNorm = 0.8922, lr_0 = 6.7799e-04
Loss = 1.6325e-01, PNorm = 64.9114, GNorm = 1.5521, lr_0 = 6.7752e-04
Loss = 1.6372e-01, PNorm = 64.9365, GNorm = 0.8489, lr_0 = 6.7706e-04
Loss = 1.6976e-01, PNorm = 64.9585, GNorm = 0.7577, lr_0 = 6.7659e-04
Loss = 1.6935e-01, PNorm = 64.9751, GNorm = 0.6494, lr_0 = 6.7613e-04
Loss = 1.8043e-01, PNorm = 64.9958, GNorm = 0.7488, lr_0 = 6.7567e-04
Loss = 1.8256e-01, PNorm = 65.0155, GNorm = 0.9940, lr_0 = 6.7520e-04
Loss = 1.6650e-01, PNorm = 65.0396, GNorm = 0.7329, lr_0 = 6.7474e-04
Loss = 1.6964e-01, PNorm = 65.0497, GNorm = 0.6486, lr_0 = 6.7428e-04
Loss = 1.6486e-01, PNorm = 65.0632, GNorm = 1.1056, lr_0 = 6.7382e-04
Loss = 1.9469e-01, PNorm = 65.0857, GNorm = 1.2415, lr_0 = 6.7335e-04
Loss = 1.6752e-01, PNorm = 65.1102, GNorm = 0.6442, lr_0 = 6.7289e-04
Loss = 1.7482e-01, PNorm = 65.1405, GNorm = 0.9151, lr_0 = 6.7243e-04
Loss = 1.6707e-01, PNorm = 65.1640, GNorm = 1.1257, lr_0 = 6.7197e-04
Loss = 1.7631e-01, PNorm = 65.1901, GNorm = 1.0208, lr_0 = 6.7151e-04
Loss = 1.5881e-01, PNorm = 65.2110, GNorm = 0.6970, lr_0 = 6.7105e-04
Loss = 1.7246e-01, PNorm = 65.2268, GNorm = 1.2762, lr_0 = 6.7059e-04
Loss = 1.6405e-01, PNorm = 65.2415, GNorm = 1.2812, lr_0 = 6.7013e-04
Loss = 1.5501e-01, PNorm = 65.2585, GNorm = 0.6198, lr_0 = 6.6967e-04
Loss = 1.5803e-01, PNorm = 65.2795, GNorm = 0.9700, lr_0 = 6.6921e-04
Loss = 1.6002e-01, PNorm = 65.2928, GNorm = 0.5811, lr_0 = 6.6876e-04
Loss = 1.6715e-01, PNorm = 65.3124, GNorm = 0.6389, lr_0 = 6.6830e-04
Loss = 1.5767e-01, PNorm = 65.3339, GNorm = 0.5200, lr_0 = 6.6784e-04
Loss = 1.7634e-01, PNorm = 65.3632, GNorm = 1.5160, lr_0 = 6.6738e-04
Loss = 1.9497e-01, PNorm = 65.3844, GNorm = 1.4726, lr_0 = 6.6693e-04
Loss = 1.9776e-01, PNorm = 65.4064, GNorm = 0.8047, lr_0 = 6.6647e-04
Loss = 1.9035e-01, PNorm = 65.4286, GNorm = 1.2943, lr_0 = 6.6601e-04
Loss = 1.7764e-01, PNorm = 65.4504, GNorm = 1.1803, lr_0 = 6.6556e-04
Loss = 1.6558e-01, PNorm = 65.4827, GNorm = 1.7100, lr_0 = 6.6510e-04
Loss = 1.5747e-01, PNorm = 65.5093, GNorm = 0.9450, lr_0 = 6.6464e-04
Loss = 1.5557e-01, PNorm = 65.5307, GNorm = 0.8355, lr_0 = 6.6419e-04
Loss = 1.5574e-01, PNorm = 65.5518, GNorm = 0.6321, lr_0 = 6.6373e-04
Loss = 1.7813e-01, PNorm = 65.5669, GNorm = 0.8950, lr_0 = 6.6328e-04
Loss = 1.4490e-01, PNorm = 65.5796, GNorm = 0.5791, lr_0 = 6.6282e-04
Validation mae = 0.246602
Epoch 7
Loss = 1.6972e-01, PNorm = 65.6039, GNorm = 0.8018, lr_0 = 6.6237e-04
Loss = 1.5176e-01, PNorm = 65.6267, GNorm = 0.7125, lr_0 = 6.6192e-04
Loss = 1.6677e-01, PNorm = 65.6496, GNorm = 0.8361, lr_0 = 6.6146e-04
Loss = 1.4973e-01, PNorm = 65.6655, GNorm = 0.5299, lr_0 = 6.6101e-04
Loss = 1.4415e-01, PNorm = 65.6811, GNorm = 1.2598, lr_0 = 6.6056e-04
Loss = 1.4738e-01, PNorm = 65.6967, GNorm = 0.8502, lr_0 = 6.6011e-04
Loss = 1.5548e-01, PNorm = 65.7146, GNorm = 0.9119, lr_0 = 6.5965e-04
Loss = 1.4738e-01, PNorm = 65.7308, GNorm = 0.8627, lr_0 = 6.5920e-04
Loss = 1.4584e-01, PNorm = 65.7541, GNorm = 0.9303, lr_0 = 6.5875e-04
Loss = 1.6144e-01, PNorm = 65.7698, GNorm = 0.5249, lr_0 = 6.5830e-04
Loss = 1.4944e-01, PNorm = 65.7925, GNorm = 0.6087, lr_0 = 6.5785e-04
Loss = 1.5293e-01, PNorm = 65.8094, GNorm = 1.5828, lr_0 = 6.5740e-04
Loss = 1.3424e-01, PNorm = 65.8271, GNorm = 0.7402, lr_0 = 6.5695e-04
Loss = 1.6878e-01, PNorm = 65.8502, GNorm = 0.6985, lr_0 = 6.5650e-04
Loss = 1.5634e-01, PNorm = 65.8732, GNorm = 0.6416, lr_0 = 6.5605e-04
Loss = 1.6657e-01, PNorm = 65.8894, GNorm = 2.1941, lr_0 = 6.5560e-04
Loss = 1.4068e-01, PNorm = 65.8994, GNorm = 0.7326, lr_0 = 6.5515e-04
Loss = 1.8308e-01, PNorm = 65.9208, GNorm = 0.7400, lr_0 = 6.5470e-04
Loss = 1.5664e-01, PNorm = 65.9459, GNorm = 0.6088, lr_0 = 6.5425e-04
Loss = 1.6557e-01, PNorm = 65.9637, GNorm = 1.3968, lr_0 = 6.5380e-04
Loss = 1.3935e-01, PNorm = 65.9869, GNorm = 0.5725, lr_0 = 6.5335e-04
Loss = 1.4543e-01, PNorm = 66.0060, GNorm = 0.6347, lr_0 = 6.5291e-04
Loss = 1.3844e-01, PNorm = 66.0293, GNorm = 0.8842, lr_0 = 6.5246e-04
Loss = 1.3668e-01, PNorm = 66.0455, GNorm = 0.6628, lr_0 = 6.5201e-04
Loss = 1.3502e-01, PNorm = 66.0617, GNorm = 0.7074, lr_0 = 6.5157e-04
Loss = 1.4453e-01, PNorm = 66.0709, GNorm = 0.9893, lr_0 = 6.5112e-04
Loss = 1.3141e-01, PNorm = 66.0853, GNorm = 0.8246, lr_0 = 6.5067e-04
Loss = 1.6228e-01, PNorm = 66.1057, GNorm = 1.0059, lr_0 = 6.5023e-04
Loss = 1.4693e-01, PNorm = 66.1289, GNorm = 0.8909, lr_0 = 6.4978e-04
Loss = 1.4891e-01, PNorm = 66.1493, GNorm = 0.6609, lr_0 = 6.4934e-04
Loss = 1.4762e-01, PNorm = 66.1719, GNorm = 0.8038, lr_0 = 6.4889e-04
Loss = 1.4871e-01, PNorm = 66.1903, GNorm = 0.5676, lr_0 = 6.4845e-04
Loss = 1.7316e-01, PNorm = 66.2047, GNorm = 1.4634, lr_0 = 6.4800e-04
Loss = 1.6918e-01, PNorm = 66.2281, GNorm = 1.7868, lr_0 = 6.4756e-04
Loss = 1.7210e-01, PNorm = 66.2499, GNorm = 0.9472, lr_0 = 6.4712e-04
Loss = 1.8057e-01, PNorm = 66.2783, GNorm = 1.0096, lr_0 = 6.4667e-04
Loss = 1.4548e-01, PNorm = 66.3075, GNorm = 0.4404, lr_0 = 6.4623e-04
Loss = 1.4941e-01, PNorm = 66.3308, GNorm = 0.5989, lr_0 = 6.4579e-04
Loss = 1.5327e-01, PNorm = 66.3513, GNorm = 0.5972, lr_0 = 6.4534e-04
Loss = 1.6673e-01, PNorm = 66.3694, GNorm = 0.8195, lr_0 = 6.4490e-04
Loss = 1.5884e-01, PNorm = 66.3918, GNorm = 0.6616, lr_0 = 6.4446e-04
Loss = 1.4200e-01, PNorm = 66.4085, GNorm = 0.6893, lr_0 = 6.4402e-04
Loss = 1.7015e-01, PNorm = 66.4300, GNorm = 1.0583, lr_0 = 6.4358e-04
Loss = 1.5829e-01, PNorm = 66.4539, GNorm = 0.6613, lr_0 = 6.4314e-04
Loss = 1.4298e-01, PNorm = 66.4728, GNorm = 0.6229, lr_0 = 6.4270e-04
Loss = 1.6077e-01, PNorm = 66.4879, GNorm = 0.9843, lr_0 = 6.4226e-04
Loss = 1.7170e-01, PNorm = 66.5018, GNorm = 0.8468, lr_0 = 6.4182e-04
Loss = 1.6600e-01, PNorm = 66.5199, GNorm = 0.6987, lr_0 = 6.4138e-04
Loss = 1.4328e-01, PNorm = 66.5357, GNorm = 0.5255, lr_0 = 6.4094e-04
Loss = 2.0154e-01, PNorm = 66.5596, GNorm = 1.1099, lr_0 = 6.4050e-04
Loss = 1.5855e-01, PNorm = 66.5906, GNorm = 1.0068, lr_0 = 6.4006e-04
Loss = 1.7032e-01, PNorm = 66.6116, GNorm = 1.7398, lr_0 = 6.3962e-04
Loss = 1.6867e-01, PNorm = 66.6301, GNorm = 1.2689, lr_0 = 6.3918e-04
Loss = 1.8003e-01, PNorm = 66.6563, GNorm = 1.0236, lr_0 = 6.3874e-04
Loss = 1.7206e-01, PNorm = 66.6783, GNorm = 0.8245, lr_0 = 6.3831e-04
Loss = 1.7659e-01, PNorm = 66.6964, GNorm = 0.6847, lr_0 = 6.3787e-04
Loss = 1.7371e-01, PNorm = 66.7179, GNorm = 0.7040, lr_0 = 6.3743e-04
Loss = 1.6081e-01, PNorm = 66.7345, GNorm = 0.5943, lr_0 = 6.3700e-04
Loss = 1.6837e-01, PNorm = 66.7522, GNorm = 1.2335, lr_0 = 6.3656e-04
Loss = 1.7053e-01, PNorm = 66.7757, GNorm = 0.6964, lr_0 = 6.3612e-04
Loss = 1.6635e-01, PNorm = 66.7976, GNorm = 1.4117, lr_0 = 6.3569e-04
Loss = 1.6217e-01, PNorm = 66.8169, GNorm = 0.9460, lr_0 = 6.3525e-04
Loss = 1.6126e-01, PNorm = 66.8435, GNorm = 1.9152, lr_0 = 6.3482e-04
Loss = 1.7041e-01, PNorm = 66.8639, GNorm = 0.8391, lr_0 = 6.3438e-04
Loss = 1.7399e-01, PNorm = 66.8913, GNorm = 1.9914, lr_0 = 6.3395e-04
Loss = 1.6501e-01, PNorm = 66.9307, GNorm = 0.9870, lr_0 = 6.3351e-04
Loss = 1.5563e-01, PNorm = 66.9666, GNorm = 0.8982, lr_0 = 6.3308e-04
Loss = 1.6124e-01, PNorm = 66.9937, GNorm = 0.7431, lr_0 = 6.3265e-04
Loss = 1.5241e-01, PNorm = 67.0054, GNorm = 0.7114, lr_0 = 6.3221e-04
Loss = 1.5510e-01, PNorm = 67.0240, GNorm = 1.0564, lr_0 = 6.3178e-04
Loss = 1.6059e-01, PNorm = 67.0495, GNorm = 1.4450, lr_0 = 6.3135e-04
Loss = 1.5906e-01, PNorm = 67.0704, GNorm = 0.9368, lr_0 = 6.3091e-04
Loss = 1.4261e-01, PNorm = 67.0900, GNorm = 0.6067, lr_0 = 6.3048e-04
Loss = 1.6451e-01, PNorm = 67.1142, GNorm = 0.8550, lr_0 = 6.3005e-04
Loss = 1.4772e-01, PNorm = 67.1350, GNorm = 1.4719, lr_0 = 6.2962e-04
Loss = 1.5064e-01, PNorm = 67.1475, GNorm = 0.9689, lr_0 = 6.2919e-04
Loss = 1.7369e-01, PNorm = 67.1668, GNorm = 0.8443, lr_0 = 6.2876e-04
Loss = 1.6867e-01, PNorm = 67.1889, GNorm = 0.8695, lr_0 = 6.2833e-04
Loss = 1.5148e-01, PNorm = 67.2095, GNorm = 0.6004, lr_0 = 6.2789e-04
Loss = 1.5827e-01, PNorm = 67.2287, GNorm = 1.3452, lr_0 = 6.2746e-04
Loss = 1.6580e-01, PNorm = 67.2429, GNorm = 0.8097, lr_0 = 6.2703e-04
Loss = 1.5905e-01, PNorm = 67.2617, GNorm = 1.3345, lr_0 = 6.2661e-04
Loss = 1.4458e-01, PNorm = 67.2785, GNorm = 0.6624, lr_0 = 6.2618e-04
Loss = 1.5803e-01, PNorm = 67.3009, GNorm = 1.1155, lr_0 = 6.2575e-04
Loss = 1.8633e-01, PNorm = 67.3215, GNorm = 0.7484, lr_0 = 6.2532e-04
Loss = 1.5485e-01, PNorm = 67.3434, GNorm = 1.2612, lr_0 = 6.2489e-04
Loss = 1.4656e-01, PNorm = 67.3578, GNorm = 1.2046, lr_0 = 6.2446e-04
Loss = 1.5058e-01, PNorm = 67.3867, GNorm = 0.7971, lr_0 = 6.2403e-04
Loss = 1.5300e-01, PNorm = 67.4102, GNorm = 0.8781, lr_0 = 6.2361e-04
Loss = 1.6619e-01, PNorm = 67.4328, GNorm = 0.8101, lr_0 = 6.2318e-04
Loss = 1.4124e-01, PNorm = 67.4488, GNorm = 0.5929, lr_0 = 6.2275e-04
Loss = 1.6342e-01, PNorm = 67.4646, GNorm = 0.7713, lr_0 = 6.2233e-04
Loss = 1.6012e-01, PNorm = 67.4846, GNorm = 0.6874, lr_0 = 6.2190e-04
Loss = 1.5784e-01, PNorm = 67.5056, GNorm = 1.2229, lr_0 = 6.2147e-04
Loss = 1.5531e-01, PNorm = 67.5315, GNorm = 1.0271, lr_0 = 6.2105e-04
Loss = 1.6795e-01, PNorm = 67.5553, GNorm = 0.7982, lr_0 = 6.2062e-04
Loss = 1.7478e-01, PNorm = 67.5798, GNorm = 0.9681, lr_0 = 6.2020e-04
Loss = 1.5802e-01, PNorm = 67.6010, GNorm = 0.8095, lr_0 = 6.1977e-04
Loss = 1.5514e-01, PNorm = 67.6194, GNorm = 0.7699, lr_0 = 6.1935e-04
Loss = 1.5652e-01, PNorm = 67.6385, GNorm = 0.8736, lr_0 = 6.1892e-04
Loss = 1.5809e-01, PNorm = 67.6644, GNorm = 0.7279, lr_0 = 6.1850e-04
Loss = 1.5411e-01, PNorm = 67.6835, GNorm = 0.7159, lr_0 = 6.1808e-04
Loss = 1.6633e-01, PNorm = 67.7036, GNorm = 0.7721, lr_0 = 6.1765e-04
Loss = 1.6000e-01, PNorm = 67.7205, GNorm = 1.7053, lr_0 = 6.1723e-04
Loss = 1.7131e-01, PNorm = 67.7368, GNorm = 1.0401, lr_0 = 6.1681e-04
Loss = 1.5296e-01, PNorm = 67.7592, GNorm = 0.8107, lr_0 = 6.1638e-04
Loss = 1.6458e-01, PNorm = 67.7816, GNorm = 0.7750, lr_0 = 6.1596e-04
Loss = 1.4525e-01, PNorm = 67.7959, GNorm = 0.6303, lr_0 = 6.1554e-04
Loss = 1.6256e-01, PNorm = 67.8117, GNorm = 0.8161, lr_0 = 6.1512e-04
Loss = 1.4404e-01, PNorm = 67.8226, GNorm = 0.8973, lr_0 = 6.1470e-04
Loss = 1.4573e-01, PNorm = 67.8380, GNorm = 0.7003, lr_0 = 6.1428e-04
Loss = 1.4494e-01, PNorm = 67.8544, GNorm = 0.6530, lr_0 = 6.1385e-04
Loss = 1.5645e-01, PNorm = 67.8784, GNorm = 0.6421, lr_0 = 6.1343e-04
Loss = 1.4834e-01, PNorm = 67.9015, GNorm = 0.5714, lr_0 = 6.1301e-04
Loss = 1.5441e-01, PNorm = 67.9218, GNorm = 0.5881, lr_0 = 6.1259e-04
Loss = 1.5623e-01, PNorm = 67.9402, GNorm = 1.3031, lr_0 = 6.1217e-04
Loss = 1.4958e-01, PNorm = 67.9570, GNorm = 0.8025, lr_0 = 6.1175e-04
Loss = 1.3099e-01, PNorm = 67.9770, GNorm = 0.5574, lr_0 = 6.1134e-04
Loss = 1.5922e-01, PNorm = 67.9952, GNorm = 0.5436, lr_0 = 6.1092e-04
Loss = 1.6107e-01, PNorm = 68.0171, GNorm = 0.6092, lr_0 = 6.1050e-04
Validation mae = 0.244216
Epoch 8
Loss = 1.3816e-01, PNorm = 68.0361, GNorm = 0.5126, lr_0 = 6.1008e-04
Loss = 1.5833e-01, PNorm = 68.0486, GNorm = 0.9242, lr_0 = 6.0966e-04
Loss = 1.4845e-01, PNorm = 68.0661, GNorm = 0.5834, lr_0 = 6.0924e-04
Loss = 1.3483e-01, PNorm = 68.0830, GNorm = 0.7229, lr_0 = 6.0883e-04
Loss = 1.4380e-01, PNorm = 68.0975, GNorm = 0.5546, lr_0 = 6.0841e-04
Loss = 1.5418e-01, PNorm = 68.1166, GNorm = 0.8486, lr_0 = 6.0799e-04
Loss = 1.3786e-01, PNorm = 68.1346, GNorm = 0.5783, lr_0 = 6.0758e-04
Loss = 1.4281e-01, PNorm = 68.1589, GNorm = 0.7137, lr_0 = 6.0716e-04
Loss = 1.3731e-01, PNorm = 68.1758, GNorm = 1.0114, lr_0 = 6.0674e-04
Loss = 1.4046e-01, PNorm = 68.1895, GNorm = 0.6280, lr_0 = 6.0633e-04
Loss = 1.3346e-01, PNorm = 68.2108, GNorm = 0.6185, lr_0 = 6.0591e-04
Loss = 1.6854e-01, PNorm = 68.2353, GNorm = 0.7079, lr_0 = 6.0550e-04
Loss = 1.4368e-01, PNorm = 68.2538, GNorm = 0.7664, lr_0 = 6.0508e-04
Loss = 1.4375e-01, PNorm = 68.2726, GNorm = 0.5459, lr_0 = 6.0467e-04
Loss = 1.4793e-01, PNorm = 68.2915, GNorm = 0.6322, lr_0 = 6.0425e-04
Loss = 1.3503e-01, PNorm = 68.3053, GNorm = 0.5132, lr_0 = 6.0384e-04
Loss = 1.6175e-01, PNorm = 68.3240, GNorm = 1.4493, lr_0 = 6.0343e-04
Loss = 1.4415e-01, PNorm = 68.3479, GNorm = 0.6349, lr_0 = 6.0301e-04
Loss = 1.4562e-01, PNorm = 68.3658, GNorm = 0.6570, lr_0 = 6.0260e-04
Loss = 1.4645e-01, PNorm = 68.3793, GNorm = 0.6421, lr_0 = 6.0219e-04
Loss = 1.3206e-01, PNorm = 68.3924, GNorm = 0.8277, lr_0 = 6.0178e-04
Loss = 1.5710e-01, PNorm = 68.4088, GNorm = 0.7335, lr_0 = 6.0136e-04
Loss = 1.4409e-01, PNorm = 68.4297, GNorm = 1.1580, lr_0 = 6.0095e-04
Loss = 1.3487e-01, PNorm = 68.4539, GNorm = 0.5776, lr_0 = 6.0054e-04
Loss = 1.3918e-01, PNorm = 68.4773, GNorm = 1.2971, lr_0 = 6.0013e-04
Loss = 1.5162e-01, PNorm = 68.4982, GNorm = 0.7626, lr_0 = 5.9972e-04
Loss = 1.4998e-01, PNorm = 68.5248, GNorm = 0.6983, lr_0 = 5.9931e-04
Loss = 1.3013e-01, PNorm = 68.5485, GNorm = 0.6391, lr_0 = 5.9890e-04
Loss = 1.3188e-01, PNorm = 68.5685, GNorm = 0.6670, lr_0 = 5.9849e-04
Loss = 1.5664e-01, PNorm = 68.5822, GNorm = 0.9131, lr_0 = 5.9808e-04
Loss = 1.4029e-01, PNorm = 68.5972, GNorm = 1.2549, lr_0 = 5.9767e-04
Loss = 1.4175e-01, PNorm = 68.6128, GNorm = 0.7720, lr_0 = 5.9726e-04
Loss = 1.5705e-01, PNorm = 68.6346, GNorm = 0.8669, lr_0 = 5.9685e-04
Loss = 1.6061e-01, PNorm = 68.6583, GNorm = 0.9859, lr_0 = 5.9644e-04
Loss = 1.5529e-01, PNorm = 68.6855, GNorm = 0.5849, lr_0 = 5.9603e-04
Loss = 1.4461e-01, PNorm = 68.7099, GNorm = 0.9460, lr_0 = 5.9562e-04
Loss = 1.4334e-01, PNorm = 68.7235, GNorm = 0.6285, lr_0 = 5.9521e-04
Loss = 1.5718e-01, PNorm = 68.7412, GNorm = 0.7027, lr_0 = 5.9481e-04
Loss = 1.7888e-01, PNorm = 68.7605, GNorm = 0.6555, lr_0 = 5.9440e-04
Loss = 1.5209e-01, PNorm = 68.7841, GNorm = 0.6385, lr_0 = 5.9399e-04
Loss = 1.6189e-01, PNorm = 68.8029, GNorm = 0.7683, lr_0 = 5.9358e-04
Loss = 1.3812e-01, PNorm = 68.8182, GNorm = 0.8881, lr_0 = 5.9318e-04
Loss = 1.4589e-01, PNorm = 68.8345, GNorm = 0.6212, lr_0 = 5.9277e-04
Loss = 1.4968e-01, PNorm = 68.8578, GNorm = 1.3572, lr_0 = 5.9236e-04
Loss = 1.3229e-01, PNorm = 68.8868, GNorm = 0.9969, lr_0 = 5.9196e-04
Loss = 1.4799e-01, PNorm = 68.9089, GNorm = 0.6865, lr_0 = 5.9155e-04
Loss = 1.5516e-01, PNorm = 68.9341, GNorm = 0.8915, lr_0 = 5.9115e-04
Loss = 1.3997e-01, PNorm = 68.9514, GNorm = 0.6470, lr_0 = 5.9074e-04
Loss = 1.5123e-01, PNorm = 68.9683, GNorm = 0.7556, lr_0 = 5.9034e-04
Loss = 1.4167e-01, PNorm = 68.9793, GNorm = 0.7216, lr_0 = 5.8993e-04
Loss = 1.3562e-01, PNorm = 69.0001, GNorm = 1.1260, lr_0 = 5.8953e-04
Loss = 1.5983e-01, PNorm = 69.0185, GNorm = 1.4015, lr_0 = 5.8913e-04
Loss = 1.6099e-01, PNorm = 69.0348, GNorm = 0.5943, lr_0 = 5.8872e-04
Loss = 1.4056e-01, PNorm = 69.0418, GNorm = 0.7039, lr_0 = 5.8832e-04
Loss = 1.4957e-01, PNorm = 69.0541, GNorm = 0.6247, lr_0 = 5.8792e-04
Loss = 1.4799e-01, PNorm = 69.0702, GNorm = 0.5476, lr_0 = 5.8751e-04
Loss = 1.3714e-01, PNorm = 69.0896, GNorm = 0.7692, lr_0 = 5.8711e-04
Loss = 1.5366e-01, PNorm = 69.1050, GNorm = 0.7883, lr_0 = 5.8671e-04
Loss = 1.4122e-01, PNorm = 69.1233, GNorm = 0.6664, lr_0 = 5.8631e-04
Loss = 1.3423e-01, PNorm = 69.1374, GNorm = 0.7243, lr_0 = 5.8591e-04
Loss = 1.4924e-01, PNorm = 69.1500, GNorm = 0.7941, lr_0 = 5.8550e-04
Loss = 1.4053e-01, PNorm = 69.1657, GNorm = 0.8215, lr_0 = 5.8510e-04
Loss = 1.4282e-01, PNorm = 69.1831, GNorm = 0.6971, lr_0 = 5.8470e-04
Loss = 1.5628e-01, PNorm = 69.1994, GNorm = 0.6902, lr_0 = 5.8430e-04
Loss = 1.4137e-01, PNorm = 69.2215, GNorm = 0.8559, lr_0 = 5.8390e-04
Loss = 1.5407e-01, PNorm = 69.2382, GNorm = 0.7257, lr_0 = 5.8350e-04
Loss = 1.3463e-01, PNorm = 69.2570, GNorm = 0.4031, lr_0 = 5.8310e-04
Loss = 1.3123e-01, PNorm = 69.2665, GNorm = 0.9509, lr_0 = 5.8270e-04
Loss = 1.6416e-01, PNorm = 69.2793, GNorm = 0.6190, lr_0 = 5.8230e-04
Loss = 1.6081e-01, PNorm = 69.2998, GNorm = 0.6260, lr_0 = 5.8190e-04
Loss = 1.3866e-01, PNorm = 69.3169, GNorm = 0.5549, lr_0 = 5.8151e-04
Loss = 1.4693e-01, PNorm = 69.3382, GNorm = 0.5857, lr_0 = 5.8111e-04
Loss = 1.5652e-01, PNorm = 69.3560, GNorm = 0.7872, lr_0 = 5.8071e-04
Loss = 1.2736e-01, PNorm = 69.3743, GNorm = 0.9893, lr_0 = 5.8031e-04
Loss = 1.4800e-01, PNorm = 69.3840, GNorm = 0.8188, lr_0 = 5.7991e-04
Loss = 1.5948e-01, PNorm = 69.4047, GNorm = 0.5847, lr_0 = 5.7952e-04
Loss = 1.6457e-01, PNorm = 69.4268, GNorm = 1.2926, lr_0 = 5.7912e-04
Loss = 1.4234e-01, PNorm = 69.4490, GNorm = 1.1832, lr_0 = 5.7872e-04
Loss = 1.6062e-01, PNorm = 69.4617, GNorm = 1.1612, lr_0 = 5.7833e-04
Loss = 1.5152e-01, PNorm = 69.4765, GNorm = 0.9890, lr_0 = 5.7793e-04
Loss = 1.3850e-01, PNorm = 69.4860, GNorm = 0.5648, lr_0 = 5.7753e-04
Loss = 1.4655e-01, PNorm = 69.5091, GNorm = 2.1422, lr_0 = 5.7714e-04
Loss = 1.4734e-01, PNorm = 69.5266, GNorm = 0.9065, lr_0 = 5.7674e-04
Loss = 1.6788e-01, PNorm = 69.5483, GNorm = 1.0509, lr_0 = 5.7635e-04
Loss = 1.5296e-01, PNorm = 69.5630, GNorm = 0.7705, lr_0 = 5.7595e-04
Loss = 1.3628e-01, PNorm = 69.5856, GNorm = 1.0764, lr_0 = 5.7556e-04
Loss = 1.5704e-01, PNorm = 69.6050, GNorm = 0.4711, lr_0 = 5.7516e-04
Loss = 1.5178e-01, PNorm = 69.6204, GNorm = 0.6166, lr_0 = 5.7477e-04
Loss = 1.5439e-01, PNorm = 69.6347, GNorm = 0.8598, lr_0 = 5.7438e-04
Loss = 1.3520e-01, PNorm = 69.6500, GNorm = 0.9779, lr_0 = 5.7398e-04
Loss = 1.4139e-01, PNorm = 69.6648, GNorm = 0.5455, lr_0 = 5.7359e-04
Loss = 1.4005e-01, PNorm = 69.6810, GNorm = 1.0012, lr_0 = 5.7320e-04
Loss = 1.3912e-01, PNorm = 69.7004, GNorm = 0.8318, lr_0 = 5.7280e-04
Loss = 1.4590e-01, PNorm = 69.7189, GNorm = 1.0199, lr_0 = 5.7241e-04
Loss = 1.5062e-01, PNorm = 69.7357, GNorm = 0.9452, lr_0 = 5.7202e-04
Loss = 1.4960e-01, PNorm = 69.7561, GNorm = 0.6747, lr_0 = 5.7163e-04
Loss = 1.6737e-01, PNorm = 69.7747, GNorm = 0.7620, lr_0 = 5.7124e-04
Loss = 1.6005e-01, PNorm = 69.7910, GNorm = 0.5202, lr_0 = 5.7084e-04
Loss = 1.7272e-01, PNorm = 69.8191, GNorm = 1.4752, lr_0 = 5.7045e-04
Loss = 1.4053e-01, PNorm = 69.8433, GNorm = 1.3073, lr_0 = 5.7006e-04
Loss = 1.5053e-01, PNorm = 69.8611, GNorm = 0.6687, lr_0 = 5.6967e-04
Loss = 1.4321e-01, PNorm = 69.8790, GNorm = 0.6835, lr_0 = 5.6928e-04
Loss = 1.6100e-01, PNorm = 69.8999, GNorm = 0.6269, lr_0 = 5.6889e-04
Loss = 1.7475e-01, PNorm = 69.9268, GNorm = 0.8303, lr_0 = 5.6850e-04
Loss = 1.3573e-01, PNorm = 69.9418, GNorm = 0.6333, lr_0 = 5.6811e-04
Loss = 1.5937e-01, PNorm = 69.9519, GNorm = 1.0933, lr_0 = 5.6772e-04
Loss = 1.3187e-01, PNorm = 69.9624, GNorm = 0.9262, lr_0 = 5.6733e-04
Loss = 1.5191e-01, PNorm = 69.9816, GNorm = 0.5762, lr_0 = 5.6695e-04
Loss = 1.5412e-01, PNorm = 70.0014, GNorm = 0.8521, lr_0 = 5.6656e-04
Loss = 1.5838e-01, PNorm = 70.0186, GNorm = 1.5696, lr_0 = 5.6617e-04
Loss = 1.4730e-01, PNorm = 70.0327, GNorm = 0.7564, lr_0 = 5.6578e-04
Loss = 1.5841e-01, PNorm = 70.0483, GNorm = 0.7495, lr_0 = 5.6539e-04
Loss = 1.4813e-01, PNorm = 70.0590, GNorm = 0.5812, lr_0 = 5.6501e-04
Loss = 1.3125e-01, PNorm = 70.0730, GNorm = 0.5118, lr_0 = 5.6462e-04
Loss = 1.5367e-01, PNorm = 70.0913, GNorm = 0.6218, lr_0 = 5.6423e-04
Loss = 1.5594e-01, PNorm = 70.1068, GNorm = 1.6004, lr_0 = 5.6385e-04
Loss = 1.5504e-01, PNorm = 70.1280, GNorm = 1.2214, lr_0 = 5.6346e-04
Loss = 1.4769e-01, PNorm = 70.1533, GNorm = 0.7350, lr_0 = 5.6307e-04
Loss = 1.4699e-01, PNorm = 70.1741, GNorm = 0.5134, lr_0 = 5.6269e-04
Loss = 1.5259e-01, PNorm = 70.1917, GNorm = 0.8502, lr_0 = 5.6230e-04
Validation mae = 0.238790
Epoch 9
Loss = 1.3280e-01, PNorm = 70.2067, GNorm = 0.6605, lr_0 = 5.6192e-04
Loss = 1.4055e-01, PNorm = 70.2307, GNorm = 0.5945, lr_0 = 5.6153e-04
Loss = 1.3737e-01, PNorm = 70.2535, GNorm = 0.8002, lr_0 = 5.6115e-04
Loss = 1.4298e-01, PNorm = 70.2741, GNorm = 0.6479, lr_0 = 5.6076e-04
Loss = 1.4202e-01, PNorm = 70.2959, GNorm = 0.9622, lr_0 = 5.6038e-04
Loss = 1.2723e-01, PNorm = 70.3146, GNorm = 0.5844, lr_0 = 5.6000e-04
Loss = 1.2701e-01, PNorm = 70.3343, GNorm = 0.5973, lr_0 = 5.5961e-04
Loss = 1.3538e-01, PNorm = 70.3518, GNorm = 0.8585, lr_0 = 5.5923e-04
Loss = 1.3166e-01, PNorm = 70.3686, GNorm = 0.8408, lr_0 = 5.5885e-04
Loss = 1.3814e-01, PNorm = 70.3906, GNorm = 0.7824, lr_0 = 5.5846e-04
Loss = 1.5483e-01, PNorm = 70.4072, GNorm = 0.9236, lr_0 = 5.5808e-04
Loss = 1.3811e-01, PNorm = 70.4182, GNorm = 1.0272, lr_0 = 5.5770e-04
Loss = 1.3729e-01, PNorm = 70.4345, GNorm = 0.5973, lr_0 = 5.5732e-04
Loss = 1.4281e-01, PNorm = 70.4572, GNorm = 1.1531, lr_0 = 5.5693e-04
Loss = 1.4122e-01, PNorm = 70.4859, GNorm = 0.8796, lr_0 = 5.5655e-04
Loss = 1.3997e-01, PNorm = 70.5095, GNorm = 0.9778, lr_0 = 5.5617e-04
Loss = 1.4894e-01, PNorm = 70.5262, GNorm = 0.6473, lr_0 = 5.5579e-04
Loss = 1.4612e-01, PNorm = 70.5434, GNorm = 0.5817, lr_0 = 5.5541e-04
Loss = 1.2636e-01, PNorm = 70.5589, GNorm = 1.2972, lr_0 = 5.5503e-04
Loss = 1.4121e-01, PNorm = 70.5787, GNorm = 0.5408, lr_0 = 5.5465e-04
Loss = 1.3146e-01, PNorm = 70.5973, GNorm = 1.1423, lr_0 = 5.5427e-04
Loss = 1.3798e-01, PNorm = 70.6117, GNorm = 0.8286, lr_0 = 5.5389e-04
Loss = 1.3019e-01, PNorm = 70.6234, GNorm = 0.9908, lr_0 = 5.5351e-04
Loss = 1.4709e-01, PNorm = 70.6345, GNorm = 0.4882, lr_0 = 5.5313e-04
Loss = 1.2787e-01, PNorm = 70.6494, GNorm = 0.7083, lr_0 = 5.5275e-04
Loss = 1.4460e-01, PNorm = 70.6687, GNorm = 1.4611, lr_0 = 5.5237e-04
Loss = 1.3081e-01, PNorm = 70.6868, GNorm = 0.8921, lr_0 = 5.5199e-04
Loss = 1.3295e-01, PNorm = 70.7006, GNorm = 0.8420, lr_0 = 5.5162e-04
Loss = 1.4258e-01, PNorm = 70.7126, GNorm = 0.8725, lr_0 = 5.5124e-04
Loss = 1.3070e-01, PNorm = 70.7277, GNorm = 1.1649, lr_0 = 5.5086e-04
Loss = 1.3917e-01, PNorm = 70.7511, GNorm = 0.4840, lr_0 = 5.5048e-04
Loss = 1.6009e-01, PNorm = 70.7723, GNorm = 0.8457, lr_0 = 5.5011e-04
Loss = 1.5903e-01, PNorm = 70.7920, GNorm = 1.6470, lr_0 = 5.4973e-04
Loss = 1.4833e-01, PNorm = 70.8154, GNorm = 1.2501, lr_0 = 5.4935e-04
Loss = 1.3660e-01, PNorm = 70.8321, GNorm = 0.6430, lr_0 = 5.4898e-04
Loss = 1.3416e-01, PNorm = 70.8503, GNorm = 0.7211, lr_0 = 5.4860e-04
Loss = 1.3193e-01, PNorm = 70.8692, GNorm = 1.2132, lr_0 = 5.4822e-04
Loss = 1.5576e-01, PNorm = 70.8789, GNorm = 1.7073, lr_0 = 5.4785e-04
Loss = 1.4774e-01, PNorm = 70.8951, GNorm = 0.6713, lr_0 = 5.4747e-04
Loss = 1.4071e-01, PNorm = 70.9163, GNorm = 1.0505, lr_0 = 5.4710e-04
Loss = 1.3376e-01, PNorm = 70.9328, GNorm = 0.7460, lr_0 = 5.4672e-04
Loss = 1.3261e-01, PNorm = 70.9507, GNorm = 0.7227, lr_0 = 5.4635e-04
Loss = 1.4925e-01, PNorm = 70.9601, GNorm = 0.8437, lr_0 = 5.4597e-04
Loss = 1.5892e-01, PNorm = 70.9744, GNorm = 1.0298, lr_0 = 5.4560e-04
Loss = 1.3129e-01, PNorm = 70.9908, GNorm = 0.6049, lr_0 = 5.4523e-04
Loss = 1.4298e-01, PNorm = 71.0077, GNorm = 0.9841, lr_0 = 5.4485e-04
Loss = 1.5662e-01, PNorm = 71.0248, GNorm = 0.6997, lr_0 = 5.4448e-04
Loss = 1.2849e-01, PNorm = 71.0415, GNorm = 0.5911, lr_0 = 5.4411e-04
Loss = 1.3705e-01, PNorm = 71.0566, GNorm = 0.7605, lr_0 = 5.4373e-04
Loss = 1.2738e-01, PNorm = 71.0697, GNorm = 0.5781, lr_0 = 5.4336e-04
Loss = 1.3667e-01, PNorm = 71.0824, GNorm = 0.8250, lr_0 = 5.4299e-04
Loss = 1.3345e-01, PNorm = 71.0976, GNorm = 0.8865, lr_0 = 5.4262e-04
Loss = 1.5056e-01, PNorm = 71.1084, GNorm = 0.7185, lr_0 = 5.4225e-04
Loss = 1.4601e-01, PNorm = 71.1254, GNorm = 1.0421, lr_0 = 5.4187e-04
Loss = 1.5335e-01, PNorm = 71.1427, GNorm = 1.2502, lr_0 = 5.4150e-04
Loss = 1.2553e-01, PNorm = 71.1594, GNorm = 1.0601, lr_0 = 5.4113e-04
Loss = 1.4423e-01, PNorm = 71.1748, GNorm = 0.6780, lr_0 = 5.4076e-04
Loss = 1.4952e-01, PNorm = 71.1923, GNorm = 1.0110, lr_0 = 5.4039e-04
Loss = 1.4263e-01, PNorm = 71.2107, GNorm = 0.6488, lr_0 = 5.4002e-04
Loss = 1.2549e-01, PNorm = 71.2255, GNorm = 0.7070, lr_0 = 5.3965e-04
Loss = 1.1283e-01, PNorm = 71.2376, GNorm = 0.5518, lr_0 = 5.3928e-04
Loss = 1.3988e-01, PNorm = 71.2495, GNorm = 0.8417, lr_0 = 5.3891e-04
Loss = 1.2326e-01, PNorm = 71.2645, GNorm = 0.8981, lr_0 = 5.3854e-04
Loss = 1.4252e-01, PNorm = 71.2825, GNorm = 0.5923, lr_0 = 5.3817e-04
Loss = 1.4274e-01, PNorm = 71.3006, GNorm = 0.5895, lr_0 = 5.3781e-04
Loss = 1.3789e-01, PNorm = 71.3157, GNorm = 0.6734, lr_0 = 5.3744e-04
Loss = 1.6384e-01, PNorm = 71.3296, GNorm = 1.5081, lr_0 = 5.3707e-04
Loss = 1.5842e-01, PNorm = 71.3526, GNorm = 0.6566, lr_0 = 5.3670e-04
Loss = 1.5672e-01, PNorm = 71.3701, GNorm = 0.8119, lr_0 = 5.3633e-04
Loss = 1.3199e-01, PNorm = 71.3865, GNorm = 1.1534, lr_0 = 5.3597e-04
Loss = 1.2795e-01, PNorm = 71.4037, GNorm = 0.8635, lr_0 = 5.3560e-04
Loss = 1.5134e-01, PNorm = 71.4126, GNorm = 0.5558, lr_0 = 5.3523e-04
Loss = 1.3954e-01, PNorm = 71.4292, GNorm = 0.7340, lr_0 = 5.3486e-04
Loss = 1.2524e-01, PNorm = 71.4477, GNorm = 0.5517, lr_0 = 5.3450e-04
Loss = 1.4798e-01, PNorm = 71.4566, GNorm = 0.8922, lr_0 = 5.3413e-04
Loss = 1.4547e-01, PNorm = 71.4690, GNorm = 1.0023, lr_0 = 5.3377e-04
Loss = 1.2425e-01, PNorm = 71.4839, GNorm = 0.5492, lr_0 = 5.3340e-04
Loss = 1.5300e-01, PNorm = 71.5040, GNorm = 0.6493, lr_0 = 5.3304e-04
Loss = 1.3614e-01, PNorm = 71.5255, GNorm = 1.0272, lr_0 = 5.3267e-04
Loss = 1.3645e-01, PNorm = 71.5449, GNorm = 0.7899, lr_0 = 5.3231e-04
Loss = 1.2741e-01, PNorm = 71.5590, GNorm = 0.6020, lr_0 = 5.3194e-04
Loss = 1.2827e-01, PNorm = 71.5690, GNorm = 0.6147, lr_0 = 5.3158e-04
Loss = 1.3104e-01, PNorm = 71.5847, GNorm = 0.6922, lr_0 = 5.3121e-04
Loss = 1.2209e-01, PNorm = 71.5993, GNorm = 0.8134, lr_0 = 5.3085e-04
Loss = 1.6945e-01, PNorm = 71.6149, GNorm = 1.1456, lr_0 = 5.3048e-04
Loss = 1.4302e-01, PNorm = 71.6331, GNorm = 0.7196, lr_0 = 5.3012e-04
Loss = 1.4072e-01, PNorm = 71.6464, GNorm = 0.8639, lr_0 = 5.2976e-04
Loss = 1.5220e-01, PNorm = 71.6625, GNorm = 0.6312, lr_0 = 5.2939e-04
Loss = 1.3723e-01, PNorm = 71.6796, GNorm = 0.6281, lr_0 = 5.2903e-04
Loss = 1.5239e-01, PNorm = 71.6954, GNorm = 0.7294, lr_0 = 5.2867e-04
Loss = 1.3227e-01, PNorm = 71.7085, GNorm = 1.1013, lr_0 = 5.2831e-04
Loss = 1.4751e-01, PNorm = 71.7211, GNorm = 0.6022, lr_0 = 5.2795e-04
Loss = 1.4503e-01, PNorm = 71.7323, GNorm = 1.2916, lr_0 = 5.2758e-04
Loss = 1.5000e-01, PNorm = 71.7412, GNorm = 0.7661, lr_0 = 5.2722e-04
Loss = 1.6413e-01, PNorm = 71.7592, GNorm = 1.2502, lr_0 = 5.2686e-04
Loss = 1.4018e-01, PNorm = 71.7776, GNorm = 0.8404, lr_0 = 5.2650e-04
Loss = 1.4765e-01, PNorm = 71.7943, GNorm = 0.7435, lr_0 = 5.2614e-04
Loss = 1.3283e-01, PNorm = 71.8102, GNorm = 0.5943, lr_0 = 5.2578e-04
Loss = 1.3541e-01, PNorm = 71.8316, GNorm = 0.8032, lr_0 = 5.2542e-04
Loss = 1.4737e-01, PNorm = 71.8430, GNorm = 0.6898, lr_0 = 5.2506e-04
Loss = 1.4895e-01, PNorm = 71.8627, GNorm = 1.0256, lr_0 = 5.2470e-04
Loss = 1.5421e-01, PNorm = 71.8786, GNorm = 0.8560, lr_0 = 5.2434e-04
Loss = 1.5357e-01, PNorm = 71.9003, GNorm = 1.1927, lr_0 = 5.2398e-04
Loss = 1.5632e-01, PNorm = 71.9104, GNorm = 1.1082, lr_0 = 5.2362e-04
Loss = 1.4420e-01, PNorm = 71.9221, GNorm = 0.7511, lr_0 = 5.2326e-04
Loss = 1.5735e-01, PNorm = 71.9338, GNorm = 0.9148, lr_0 = 5.2290e-04
Loss = 1.3459e-01, PNorm = 71.9480, GNorm = 0.5011, lr_0 = 5.2255e-04
Loss = 1.3278e-01, PNorm = 71.9666, GNorm = 0.6996, lr_0 = 5.2219e-04
Loss = 1.4571e-01, PNorm = 71.9825, GNorm = 0.6803, lr_0 = 5.2183e-04
Loss = 1.4259e-01, PNorm = 71.9971, GNorm = 1.1197, lr_0 = 5.2147e-04
Loss = 1.2439e-01, PNorm = 72.0134, GNorm = 0.8555, lr_0 = 5.2112e-04
Loss = 1.4079e-01, PNorm = 72.0282, GNorm = 0.8853, lr_0 = 5.2076e-04
Loss = 1.2486e-01, PNorm = 72.0402, GNorm = 0.7537, lr_0 = 5.2040e-04
Loss = 1.3744e-01, PNorm = 72.0568, GNorm = 0.9386, lr_0 = 5.2005e-04
Loss = 1.4455e-01, PNorm = 72.0692, GNorm = 1.0429, lr_0 = 5.1969e-04
Loss = 1.6841e-01, PNorm = 72.0899, GNorm = 0.6754, lr_0 = 5.1933e-04
Loss = 1.3872e-01, PNorm = 72.1077, GNorm = 0.9511, lr_0 = 5.1898e-04
Loss = 1.3793e-01, PNorm = 72.1236, GNorm = 0.7074, lr_0 = 5.1862e-04
Loss = 1.2537e-01, PNorm = 72.1356, GNorm = 0.6744, lr_0 = 5.1827e-04
Loss = 1.2673e-01, PNorm = 72.1472, GNorm = 0.7748, lr_0 = 5.1791e-04
Validation mae = 0.239282
Epoch 10
Loss = 1.4720e-01, PNorm = 72.1595, GNorm = 1.2738, lr_0 = 5.1756e-04
Loss = 1.3249e-01, PNorm = 72.1748, GNorm = 0.4490, lr_0 = 5.1720e-04
Loss = 1.3457e-01, PNorm = 72.1953, GNorm = 1.0674, lr_0 = 5.1685e-04
Loss = 1.5634e-01, PNorm = 72.2155, GNorm = 0.7187, lr_0 = 5.1649e-04
Loss = 1.5171e-01, PNorm = 72.2374, GNorm = 1.1631, lr_0 = 5.1614e-04
Loss = 1.2930e-01, PNorm = 72.2521, GNorm = 0.6625, lr_0 = 5.1579e-04
Loss = 1.3535e-01, PNorm = 72.2690, GNorm = 0.7086, lr_0 = 5.1543e-04
Loss = 1.4064e-01, PNorm = 72.2820, GNorm = 0.6706, lr_0 = 5.1508e-04
Loss = 1.4120e-01, PNorm = 72.2940, GNorm = 1.2160, lr_0 = 5.1473e-04
Loss = 1.3366e-01, PNorm = 72.3061, GNorm = 0.9500, lr_0 = 5.1437e-04
Loss = 1.3591e-01, PNorm = 72.3233, GNorm = 0.5664, lr_0 = 5.1402e-04
Loss = 1.2829e-01, PNorm = 72.3409, GNorm = 0.9210, lr_0 = 5.1367e-04
Loss = 1.3537e-01, PNorm = 72.3580, GNorm = 0.8747, lr_0 = 5.1332e-04
Loss = 1.3680e-01, PNorm = 72.3757, GNorm = 0.5616, lr_0 = 5.1297e-04
Loss = 1.3584e-01, PNorm = 72.3861, GNorm = 0.6156, lr_0 = 5.1262e-04
Loss = 1.3057e-01, PNorm = 72.4020, GNorm = 0.7066, lr_0 = 5.1226e-04
Loss = 1.4087e-01, PNorm = 72.4201, GNorm = 0.6919, lr_0 = 5.1191e-04
Loss = 1.3723e-01, PNorm = 72.4386, GNorm = 0.8163, lr_0 = 5.1156e-04
Loss = 1.2235e-01, PNorm = 72.4522, GNorm = 0.5693, lr_0 = 5.1121e-04
Loss = 1.3535e-01, PNorm = 72.4640, GNorm = 0.8799, lr_0 = 5.1086e-04
Loss = 1.1980e-01, PNorm = 72.4837, GNorm = 0.7168, lr_0 = 5.1051e-04
Loss = 1.3635e-01, PNorm = 72.4860, GNorm = 0.6405, lr_0 = 5.1016e-04
Loss = 1.2646e-01, PNorm = 72.4970, GNorm = 0.7300, lr_0 = 5.0981e-04
Loss = 1.3148e-01, PNorm = 72.5071, GNorm = 1.0784, lr_0 = 5.0946e-04
Loss = 1.3030e-01, PNorm = 72.5237, GNorm = 0.7815, lr_0 = 5.0911e-04
Loss = 1.3390e-01, PNorm = 72.5393, GNorm = 0.7089, lr_0 = 5.0877e-04
Loss = 1.3649e-01, PNorm = 72.5534, GNorm = 0.4624, lr_0 = 5.0842e-04
Loss = 1.3293e-01, PNorm = 72.5717, GNorm = 0.8580, lr_0 = 5.0807e-04
Loss = 1.4886e-01, PNorm = 72.5867, GNorm = 0.6474, lr_0 = 5.0772e-04
Loss = 1.3723e-01, PNorm = 72.6034, GNorm = 0.6075, lr_0 = 5.0737e-04
Loss = 1.3490e-01, PNorm = 72.6195, GNorm = 0.7924, lr_0 = 5.0703e-04
Loss = 1.3807e-01, PNorm = 72.6369, GNorm = 0.8460, lr_0 = 5.0668e-04
Loss = 1.4415e-01, PNorm = 72.6569, GNorm = 0.7854, lr_0 = 5.0633e-04
Loss = 1.2489e-01, PNorm = 72.6766, GNorm = 0.6282, lr_0 = 5.0598e-04
Loss = 1.2510e-01, PNorm = 72.6911, GNorm = 0.6784, lr_0 = 5.0564e-04
Loss = 1.4005e-01, PNorm = 72.7030, GNorm = 0.8263, lr_0 = 5.0529e-04
Loss = 1.4788e-01, PNorm = 72.7169, GNorm = 0.5550, lr_0 = 5.0494e-04
Loss = 1.4785e-01, PNorm = 72.7326, GNorm = 0.5303, lr_0 = 5.0460e-04
Loss = 1.4207e-01, PNorm = 72.7561, GNorm = 1.3614, lr_0 = 5.0425e-04
Loss = 1.6031e-01, PNorm = 72.7801, GNorm = 1.0726, lr_0 = 5.0391e-04
Loss = 1.2995e-01, PNorm = 72.7997, GNorm = 0.5470, lr_0 = 5.0356e-04
Loss = 1.2828e-01, PNorm = 72.8113, GNorm = 0.8820, lr_0 = 5.0322e-04
Loss = 1.1579e-01, PNorm = 72.8220, GNorm = 0.6403, lr_0 = 5.0287e-04
Loss = 1.3164e-01, PNorm = 72.8381, GNorm = 1.3186, lr_0 = 5.0253e-04
Loss = 1.5162e-01, PNorm = 72.8535, GNorm = 0.7056, lr_0 = 5.0218e-04
Loss = 1.2571e-01, PNorm = 72.8742, GNorm = 0.8995, lr_0 = 5.0184e-04
Loss = 1.3969e-01, PNorm = 72.8943, GNorm = 0.6600, lr_0 = 5.0150e-04
Loss = 1.1962e-01, PNorm = 72.9081, GNorm = 0.5520, lr_0 = 5.0115e-04
Loss = 1.4175e-01, PNorm = 72.9241, GNorm = 0.6431, lr_0 = 5.0081e-04
Loss = 1.2779e-01, PNorm = 72.9390, GNorm = 0.7518, lr_0 = 5.0047e-04
Loss = 1.2630e-01, PNorm = 72.9605, GNorm = 0.6147, lr_0 = 5.0012e-04
Loss = 1.2578e-01, PNorm = 72.9725, GNorm = 0.9849, lr_0 = 4.9978e-04
Loss = 1.2491e-01, PNorm = 72.9862, GNorm = 0.9939, lr_0 = 4.9944e-04
Loss = 1.3819e-01, PNorm = 73.0001, GNorm = 0.8439, lr_0 = 4.9910e-04
Loss = 1.1972e-01, PNorm = 73.0140, GNorm = 0.4700, lr_0 = 4.9875e-04
Loss = 1.3506e-01, PNorm = 73.0284, GNorm = 0.6812, lr_0 = 4.9841e-04
Loss = 1.3305e-01, PNorm = 73.0437, GNorm = 0.6617, lr_0 = 4.9807e-04
Loss = 1.3384e-01, PNorm = 73.0554, GNorm = 0.8057, lr_0 = 4.9773e-04
Loss = 1.4727e-01, PNorm = 73.0674, GNorm = 0.5762, lr_0 = 4.9739e-04
Loss = 1.3490e-01, PNorm = 73.0809, GNorm = 1.1203, lr_0 = 4.9705e-04
Loss = 1.2541e-01, PNorm = 73.0952, GNorm = 0.8558, lr_0 = 4.9671e-04
Loss = 1.1712e-01, PNorm = 73.1094, GNorm = 0.8311, lr_0 = 4.9637e-04
Loss = 1.4677e-01, PNorm = 73.1222, GNorm = 0.5582, lr_0 = 4.9603e-04
Loss = 1.3918e-01, PNorm = 73.1365, GNorm = 0.7619, lr_0 = 4.9569e-04
Loss = 1.4062e-01, PNorm = 73.1472, GNorm = 0.7842, lr_0 = 4.9535e-04
Loss = 1.2827e-01, PNorm = 73.1596, GNorm = 0.6935, lr_0 = 4.9501e-04
Loss = 1.4736e-01, PNorm = 73.1733, GNorm = 0.6736, lr_0 = 4.9467e-04
Loss = 1.7349e-01, PNorm = 73.1970, GNorm = 0.7704, lr_0 = 4.9433e-04
Loss = 1.3124e-01, PNorm = 73.2197, GNorm = 0.8086, lr_0 = 4.9399e-04
Loss = 1.2993e-01, PNorm = 73.2411, GNorm = 0.7155, lr_0 = 4.9365e-04
Loss = 1.3633e-01, PNorm = 73.2605, GNorm = 0.6813, lr_0 = 4.9332e-04
Loss = 1.3201e-01, PNorm = 73.2704, GNorm = 0.6725, lr_0 = 4.9298e-04
Loss = 1.2757e-01, PNorm = 73.2814, GNorm = 1.2023, lr_0 = 4.9264e-04
Loss = 1.3982e-01, PNorm = 73.2907, GNorm = 0.7504, lr_0 = 4.9230e-04
Loss = 1.2581e-01, PNorm = 73.3055, GNorm = 0.7179, lr_0 = 4.9197e-04
Loss = 1.4458e-01, PNorm = 73.3227, GNorm = 0.9897, lr_0 = 4.9163e-04
Loss = 1.3650e-01, PNorm = 73.3432, GNorm = 0.9885, lr_0 = 4.9129e-04
Loss = 1.4613e-01, PNorm = 73.3600, GNorm = 0.6683, lr_0 = 4.9095e-04
Loss = 1.1865e-01, PNorm = 73.3727, GNorm = 0.7786, lr_0 = 4.9062e-04
Loss = 1.2576e-01, PNorm = 73.3836, GNorm = 0.8530, lr_0 = 4.9028e-04
Loss = 1.2907e-01, PNorm = 73.3973, GNorm = 0.8795, lr_0 = 4.8995e-04
Loss = 1.3479e-01, PNorm = 73.4097, GNorm = 0.5315, lr_0 = 4.8961e-04
Loss = 1.4092e-01, PNorm = 73.4217, GNorm = 0.5895, lr_0 = 4.8928e-04
Loss = 1.5271e-01, PNorm = 73.4369, GNorm = 0.5585, lr_0 = 4.8894e-04
Loss = 1.3633e-01, PNorm = 73.4490, GNorm = 0.6570, lr_0 = 4.8861e-04
Loss = 1.3770e-01, PNorm = 73.4680, GNorm = 0.6414, lr_0 = 4.8827e-04
Loss = 1.2522e-01, PNorm = 73.4800, GNorm = 1.3434, lr_0 = 4.8794e-04
Loss = 1.2907e-01, PNorm = 73.4883, GNorm = 0.8372, lr_0 = 4.8760e-04
Loss = 1.2819e-01, PNorm = 73.5011, GNorm = 1.5856, lr_0 = 4.8727e-04
Loss = 1.3144e-01, PNorm = 73.5110, GNorm = 0.8939, lr_0 = 4.8693e-04
Loss = 1.2753e-01, PNorm = 73.5250, GNorm = 0.6441, lr_0 = 4.8660e-04
Loss = 1.2731e-01, PNorm = 73.5353, GNorm = 0.6890, lr_0 = 4.8627e-04
Loss = 1.2612e-01, PNorm = 73.5480, GNorm = 0.4764, lr_0 = 4.8593e-04
Loss = 1.5585e-01, PNorm = 73.5575, GNorm = 0.8606, lr_0 = 4.8560e-04
Loss = 1.3894e-01, PNorm = 73.5737, GNorm = 0.7896, lr_0 = 4.8527e-04
Loss = 1.3315e-01, PNorm = 73.5901, GNorm = 0.8741, lr_0 = 4.8494e-04
Loss = 1.3198e-01, PNorm = 73.6050, GNorm = 0.6801, lr_0 = 4.8460e-04
Loss = 1.4528e-01, PNorm = 73.6177, GNorm = 0.6636, lr_0 = 4.8427e-04
Loss = 1.1912e-01, PNorm = 73.6329, GNorm = 0.9576, lr_0 = 4.8394e-04
Loss = 1.3374e-01, PNorm = 73.6453, GNorm = 0.9851, lr_0 = 4.8361e-04
Loss = 1.3318e-01, PNorm = 73.6540, GNorm = 0.7376, lr_0 = 4.8328e-04
Loss = 1.3185e-01, PNorm = 73.6690, GNorm = 0.9159, lr_0 = 4.8295e-04
Loss = 1.2928e-01, PNorm = 73.6805, GNorm = 0.5891, lr_0 = 4.8262e-04
Loss = 1.3101e-01, PNorm = 73.6949, GNorm = 1.0024, lr_0 = 4.8228e-04
Loss = 1.3246e-01, PNorm = 73.7012, GNorm = 1.1223, lr_0 = 4.8195e-04
Loss = 1.3142e-01, PNorm = 73.7177, GNorm = 0.5974, lr_0 = 4.8162e-04
Loss = 1.2243e-01, PNorm = 73.7382, GNorm = 0.4882, lr_0 = 4.8129e-04
Loss = 1.3840e-01, PNorm = 73.7534, GNorm = 0.8658, lr_0 = 4.8096e-04
Loss = 1.3488e-01, PNorm = 73.7643, GNorm = 0.7484, lr_0 = 4.8064e-04
Loss = 1.3064e-01, PNorm = 73.7733, GNorm = 0.7208, lr_0 = 4.8031e-04
Loss = 1.2931e-01, PNorm = 73.7868, GNorm = 0.8300, lr_0 = 4.7998e-04
Loss = 1.4884e-01, PNorm = 73.8009, GNorm = 0.9620, lr_0 = 4.7965e-04
Loss = 1.1000e-01, PNorm = 73.8184, GNorm = 0.7038, lr_0 = 4.7932e-04
Loss = 1.4161e-01, PNorm = 73.8323, GNorm = 1.2073, lr_0 = 4.7899e-04
Loss = 1.3003e-01, PNorm = 73.8462, GNorm = 1.1071, lr_0 = 4.7866e-04
Loss = 1.3868e-01, PNorm = 73.8547, GNorm = 0.8821, lr_0 = 4.7833e-04
Loss = 1.5582e-01, PNorm = 73.8653, GNorm = 1.0858, lr_0 = 4.7801e-04
Loss = 1.3301e-01, PNorm = 73.8787, GNorm = 0.8471, lr_0 = 4.7768e-04
Loss = 1.0941e-01, PNorm = 73.8941, GNorm = 0.6515, lr_0 = 4.7735e-04
Loss = 1.3735e-01, PNorm = 73.9083, GNorm = 0.7710, lr_0 = 4.7703e-04
Validation mae = 0.234550
Epoch 11
Loss = 1.1790e-01, PNorm = 73.9237, GNorm = 0.7750, lr_0 = 4.7670e-04
Loss = 1.2503e-01, PNorm = 73.9418, GNorm = 0.8979, lr_0 = 4.7637e-04
Loss = 1.2190e-01, PNorm = 73.9525, GNorm = 0.6971, lr_0 = 4.7605e-04
Loss = 1.0887e-01, PNorm = 73.9674, GNorm = 0.5171, lr_0 = 4.7572e-04
Loss = 1.1838e-01, PNorm = 73.9780, GNorm = 0.8766, lr_0 = 4.7539e-04
Loss = 1.1638e-01, PNorm = 73.9903, GNorm = 0.5546, lr_0 = 4.7507e-04
Loss = 1.3424e-01, PNorm = 74.0024, GNorm = 0.6863, lr_0 = 4.7474e-04
Loss = 1.2210e-01, PNorm = 74.0221, GNorm = 0.7646, lr_0 = 4.7442e-04
Loss = 1.2017e-01, PNorm = 74.0447, GNorm = 0.7685, lr_0 = 4.7409e-04
Loss = 1.2256e-01, PNorm = 74.0587, GNorm = 0.5472, lr_0 = 4.7377e-04
Loss = 1.2488e-01, PNorm = 74.0711, GNorm = 0.6037, lr_0 = 4.7344e-04
Loss = 1.3937e-01, PNorm = 74.0835, GNorm = 0.5639, lr_0 = 4.7312e-04
Loss = 1.5257e-01, PNorm = 74.0963, GNorm = 0.8797, lr_0 = 4.7279e-04
Loss = 1.1841e-01, PNorm = 74.1137, GNorm = 0.5622, lr_0 = 4.7247e-04
Loss = 1.2902e-01, PNorm = 74.1219, GNorm = 0.7527, lr_0 = 4.7215e-04
Loss = 1.1847e-01, PNorm = 74.1320, GNorm = 0.8352, lr_0 = 4.7182e-04
Loss = 1.1377e-01, PNorm = 74.1470, GNorm = 1.0825, lr_0 = 4.7150e-04
Loss = 1.1261e-01, PNorm = 74.1588, GNorm = 0.7238, lr_0 = 4.7118e-04
Loss = 1.3782e-01, PNorm = 74.1680, GNorm = 1.0238, lr_0 = 4.7085e-04
Loss = 1.2655e-01, PNorm = 74.1801, GNorm = 1.2810, lr_0 = 4.7053e-04
Loss = 1.2663e-01, PNorm = 74.1933, GNorm = 0.7955, lr_0 = 4.7021e-04
Loss = 1.3000e-01, PNorm = 74.2137, GNorm = 0.7928, lr_0 = 4.6989e-04
Loss = 1.3008e-01, PNorm = 74.2324, GNorm = 0.7339, lr_0 = 4.6957e-04
Loss = 1.1426e-01, PNorm = 74.2455, GNorm = 0.5653, lr_0 = 4.6924e-04
Loss = 1.3490e-01, PNorm = 74.2570, GNorm = 0.6081, lr_0 = 4.6892e-04
Loss = 1.2710e-01, PNorm = 74.2682, GNorm = 0.6691, lr_0 = 4.6860e-04
Loss = 1.3597e-01, PNorm = 74.2858, GNorm = 0.6661, lr_0 = 4.6828e-04
Loss = 1.3699e-01, PNorm = 74.2974, GNorm = 0.5164, lr_0 = 4.6796e-04
Loss = 1.1329e-01, PNorm = 74.3129, GNorm = 0.7113, lr_0 = 4.6764e-04
Loss = 1.2519e-01, PNorm = 74.3276, GNorm = 1.2884, lr_0 = 4.6732e-04
Loss = 1.2518e-01, PNorm = 74.3447, GNorm = 1.3856, lr_0 = 4.6700e-04
Loss = 1.1555e-01, PNorm = 74.3580, GNorm = 0.6791, lr_0 = 4.6668e-04
Loss = 1.3468e-01, PNorm = 74.3716, GNorm = 0.7124, lr_0 = 4.6636e-04
Loss = 1.1959e-01, PNorm = 74.3843, GNorm = 0.6160, lr_0 = 4.6604e-04
Loss = 1.1718e-01, PNorm = 74.3987, GNorm = 0.5936, lr_0 = 4.6572e-04
Loss = 1.1223e-01, PNorm = 74.4083, GNorm = 0.7883, lr_0 = 4.6540e-04
Loss = 1.3843e-01, PNorm = 74.4262, GNorm = 0.7755, lr_0 = 4.6508e-04
Loss = 1.2103e-01, PNorm = 74.4396, GNorm = 0.6030, lr_0 = 4.6476e-04
Loss = 1.3703e-01, PNorm = 74.4471, GNorm = 0.8173, lr_0 = 4.6445e-04
Loss = 1.2982e-01, PNorm = 74.4629, GNorm = 1.3325, lr_0 = 4.6413e-04
Loss = 1.4832e-01, PNorm = 74.4746, GNorm = 0.9398, lr_0 = 4.6381e-04
Loss = 1.2637e-01, PNorm = 74.4955, GNorm = 0.7891, lr_0 = 4.6349e-04
Loss = 1.3535e-01, PNorm = 74.5093, GNorm = 0.6976, lr_0 = 4.6317e-04
Loss = 1.3199e-01, PNorm = 74.5214, GNorm = 1.0700, lr_0 = 4.6286e-04
Loss = 1.3681e-01, PNorm = 74.5363, GNorm = 0.6573, lr_0 = 4.6254e-04
Loss = 1.4682e-01, PNorm = 74.5539, GNorm = 0.6742, lr_0 = 4.6222e-04
Loss = 1.2419e-01, PNorm = 74.5696, GNorm = 0.7731, lr_0 = 4.6191e-04
Loss = 1.2652e-01, PNorm = 74.5771, GNorm = 0.8136, lr_0 = 4.6159e-04
Loss = 1.2008e-01, PNorm = 74.5899, GNorm = 0.6106, lr_0 = 4.6127e-04
Loss = 1.2502e-01, PNorm = 74.5977, GNorm = 0.7888, lr_0 = 4.6096e-04
Loss = 1.1962e-01, PNorm = 74.6135, GNorm = 0.5646, lr_0 = 4.6064e-04
Loss = 1.2518e-01, PNorm = 74.6275, GNorm = 0.5161, lr_0 = 4.6033e-04
Loss = 1.2712e-01, PNorm = 74.6468, GNorm = 0.6885, lr_0 = 4.6001e-04
Loss = 1.1706e-01, PNorm = 74.6594, GNorm = 0.6749, lr_0 = 4.5970e-04
Loss = 1.3195e-01, PNorm = 74.6683, GNorm = 0.6571, lr_0 = 4.5938e-04
Loss = 1.1468e-01, PNorm = 74.6769, GNorm = 0.8468, lr_0 = 4.5907e-04
Loss = 1.1700e-01, PNorm = 74.6841, GNorm = 0.8962, lr_0 = 4.5875e-04
Loss = 1.2389e-01, PNorm = 74.6907, GNorm = 0.5960, lr_0 = 4.5844e-04
Loss = 1.1161e-01, PNorm = 74.7032, GNorm = 0.5753, lr_0 = 4.5812e-04
Loss = 1.2210e-01, PNorm = 74.7096, GNorm = 0.8453, lr_0 = 4.5781e-04
Loss = 1.3737e-01, PNorm = 74.7205, GNorm = 0.6080, lr_0 = 4.5750e-04
Loss = 1.2163e-01, PNorm = 74.7313, GNorm = 0.7374, lr_0 = 4.5718e-04
Loss = 1.3907e-01, PNorm = 74.7438, GNorm = 0.5290, lr_0 = 4.5687e-04
Loss = 1.1914e-01, PNorm = 74.7572, GNorm = 0.9240, lr_0 = 4.5656e-04
Loss = 1.3077e-01, PNorm = 74.7711, GNorm = 0.8403, lr_0 = 4.5624e-04
Loss = 1.1640e-01, PNorm = 74.7812, GNorm = 0.4553, lr_0 = 4.5593e-04
Loss = 1.3343e-01, PNorm = 74.7940, GNorm = 0.7976, lr_0 = 4.5562e-04
Loss = 1.2902e-01, PNorm = 74.8116, GNorm = 0.6314, lr_0 = 4.5531e-04
Loss = 1.2804e-01, PNorm = 74.8295, GNorm = 0.7608, lr_0 = 4.5499e-04
Loss = 1.1940e-01, PNorm = 74.8482, GNorm = 0.5087, lr_0 = 4.5468e-04
Loss = 1.3088e-01, PNorm = 74.8644, GNorm = 0.7144, lr_0 = 4.5437e-04
Loss = 1.3152e-01, PNorm = 74.8820, GNorm = 0.7850, lr_0 = 4.5406e-04
Loss = 1.3884e-01, PNorm = 74.8948, GNorm = 0.5840, lr_0 = 4.5375e-04
Loss = 1.1907e-01, PNorm = 74.9058, GNorm = 0.6667, lr_0 = 4.5344e-04
Loss = 1.1377e-01, PNorm = 74.9116, GNorm = 1.0881, lr_0 = 4.5313e-04
Loss = 1.3180e-01, PNorm = 74.9208, GNorm = 0.5347, lr_0 = 4.5282e-04
Loss = 1.4156e-01, PNorm = 74.9344, GNorm = 0.9654, lr_0 = 4.5251e-04
Loss = 1.3521e-01, PNorm = 74.9467, GNorm = 0.7461, lr_0 = 4.5220e-04
Loss = 1.4518e-01, PNorm = 74.9606, GNorm = 0.8269, lr_0 = 4.5189e-04
Loss = 1.3553e-01, PNorm = 74.9756, GNorm = 0.6454, lr_0 = 4.5158e-04
Loss = 1.2553e-01, PNorm = 74.9907, GNorm = 0.8366, lr_0 = 4.5127e-04
Loss = 1.2111e-01, PNorm = 74.9976, GNorm = 0.6242, lr_0 = 4.5096e-04
Loss = 1.2163e-01, PNorm = 75.0123, GNorm = 0.5933, lr_0 = 4.5065e-04
Loss = 1.2233e-01, PNorm = 75.0229, GNorm = 0.6956, lr_0 = 4.5034e-04
Loss = 1.2539e-01, PNorm = 75.0332, GNorm = 0.5230, lr_0 = 4.5003e-04
Loss = 1.2388e-01, PNorm = 75.0437, GNorm = 0.8258, lr_0 = 4.4972e-04
Loss = 1.3558e-01, PNorm = 75.0540, GNorm = 0.4562, lr_0 = 4.4942e-04
Loss = 1.4203e-01, PNorm = 75.0653, GNorm = 1.1139, lr_0 = 4.4911e-04
Loss = 1.1950e-01, PNorm = 75.0796, GNorm = 0.8215, lr_0 = 4.4880e-04
Loss = 1.2404e-01, PNorm = 75.0954, GNorm = 1.0397, lr_0 = 4.4849e-04
Loss = 1.2254e-01, PNorm = 75.1089, GNorm = 0.6106, lr_0 = 4.4819e-04
Loss = 1.1837e-01, PNorm = 75.1234, GNorm = 0.7534, lr_0 = 4.4788e-04
Loss = 1.2933e-01, PNorm = 75.1328, GNorm = 0.7495, lr_0 = 4.4757e-04
Loss = 1.3144e-01, PNorm = 75.1454, GNorm = 0.6888, lr_0 = 4.4727e-04
Loss = 1.1587e-01, PNorm = 75.1581, GNorm = 0.6668, lr_0 = 4.4696e-04
Loss = 1.2680e-01, PNorm = 75.1716, GNorm = 0.8778, lr_0 = 4.4665e-04
Loss = 1.5174e-01, PNorm = 75.1808, GNorm = 0.8196, lr_0 = 4.4635e-04
Loss = 1.1532e-01, PNorm = 75.1942, GNorm = 0.7016, lr_0 = 4.4604e-04
Loss = 1.4210e-01, PNorm = 75.2083, GNorm = 0.7347, lr_0 = 4.4574e-04
Loss = 1.2760e-01, PNorm = 75.2292, GNorm = 0.5885, lr_0 = 4.4543e-04
Loss = 1.1842e-01, PNorm = 75.2411, GNorm = 0.6998, lr_0 = 4.4513e-04
Loss = 1.2754e-01, PNorm = 75.2507, GNorm = 1.0892, lr_0 = 4.4482e-04
Loss = 1.2860e-01, PNorm = 75.2607, GNorm = 0.6749, lr_0 = 4.4452e-04
Loss = 1.1699e-01, PNorm = 75.2693, GNorm = 0.7659, lr_0 = 4.4421e-04
Loss = 1.1609e-01, PNorm = 75.2784, GNorm = 0.7099, lr_0 = 4.4391e-04
Loss = 1.3421e-01, PNorm = 75.2864, GNorm = 0.6409, lr_0 = 4.4360e-04
Loss = 1.4127e-01, PNorm = 75.2988, GNorm = 0.7003, lr_0 = 4.4330e-04
Loss = 1.4136e-01, PNorm = 75.3082, GNorm = 0.6317, lr_0 = 4.4299e-04
Loss = 1.3306e-01, PNorm = 75.3170, GNorm = 1.1630, lr_0 = 4.4269e-04
Loss = 1.2704e-01, PNorm = 75.3338, GNorm = 0.9038, lr_0 = 4.4239e-04
Loss = 1.3713e-01, PNorm = 75.3535, GNorm = 1.1000, lr_0 = 4.4209e-04
Loss = 1.4545e-01, PNorm = 75.3770, GNorm = 1.0557, lr_0 = 4.4178e-04
Loss = 1.4564e-01, PNorm = 75.3955, GNorm = 0.6164, lr_0 = 4.4148e-04
Loss = 1.3208e-01, PNorm = 75.4037, GNorm = 0.4971, lr_0 = 4.4118e-04
Loss = 1.2248e-01, PNorm = 75.4226, GNorm = 0.8054, lr_0 = 4.4088e-04
Loss = 1.3902e-01, PNorm = 75.4392, GNorm = 0.5205, lr_0 = 4.4057e-04
Loss = 1.3963e-01, PNorm = 75.4584, GNorm = 1.1570, lr_0 = 4.4027e-04
Loss = 1.3148e-01, PNorm = 75.4749, GNorm = 0.7645, lr_0 = 4.3997e-04
Loss = 1.1884e-01, PNorm = 75.4882, GNorm = 0.7791, lr_0 = 4.3967e-04
Loss = 1.4875e-01, PNorm = 75.5001, GNorm = 0.7716, lr_0 = 4.3937e-04
Validation mae = 0.234947
Epoch 12
Loss = 1.1112e-01, PNorm = 75.5160, GNorm = 0.7494, lr_0 = 4.3907e-04
Loss = 1.1963e-01, PNorm = 75.5262, GNorm = 0.8411, lr_0 = 4.3877e-04
Loss = 1.2151e-01, PNorm = 75.5427, GNorm = 0.6231, lr_0 = 4.3846e-04
Loss = 1.2460e-01, PNorm = 75.5501, GNorm = 0.7011, lr_0 = 4.3816e-04
Loss = 1.1389e-01, PNorm = 75.5620, GNorm = 0.5552, lr_0 = 4.3786e-04
Loss = 1.2172e-01, PNorm = 75.5686, GNorm = 0.7656, lr_0 = 4.3756e-04
Loss = 1.2055e-01, PNorm = 75.5806, GNorm = 1.0938, lr_0 = 4.3726e-04
Loss = 1.2873e-01, PNorm = 75.5944, GNorm = 0.7752, lr_0 = 4.3696e-04
Loss = 1.0924e-01, PNorm = 75.6064, GNorm = 0.6239, lr_0 = 4.3667e-04
Loss = 1.2745e-01, PNorm = 75.6201, GNorm = 0.6589, lr_0 = 4.3637e-04
Loss = 1.1233e-01, PNorm = 75.6286, GNorm = 0.6029, lr_0 = 4.3607e-04
Loss = 1.2230e-01, PNorm = 75.6382, GNorm = 1.1639, lr_0 = 4.3577e-04
Loss = 1.1172e-01, PNorm = 75.6510, GNorm = 0.6366, lr_0 = 4.3547e-04
Loss = 1.2822e-01, PNorm = 75.6634, GNorm = 0.7334, lr_0 = 4.3517e-04
Loss = 1.1568e-01, PNorm = 75.6772, GNorm = 0.5093, lr_0 = 4.3487e-04
Loss = 1.1911e-01, PNorm = 75.6937, GNorm = 0.8255, lr_0 = 4.3458e-04
Loss = 1.1722e-01, PNorm = 75.7092, GNorm = 0.8223, lr_0 = 4.3428e-04
Loss = 1.1946e-01, PNorm = 75.7268, GNorm = 0.4584, lr_0 = 4.3398e-04
Loss = 1.4083e-01, PNorm = 75.7332, GNorm = 0.9675, lr_0 = 4.3368e-04
Loss = 1.2450e-01, PNorm = 75.7438, GNorm = 0.7090, lr_0 = 4.3339e-04
Loss = 1.1278e-01, PNorm = 75.7511, GNorm = 0.8548, lr_0 = 4.3309e-04
Loss = 1.2088e-01, PNorm = 75.7572, GNorm = 0.8124, lr_0 = 4.3279e-04
Loss = 1.2523e-01, PNorm = 75.7650, GNorm = 0.6764, lr_0 = 4.3250e-04
Loss = 1.2944e-01, PNorm = 75.7765, GNorm = 0.7437, lr_0 = 4.3220e-04
Loss = 1.1165e-01, PNorm = 75.7891, GNorm = 0.6658, lr_0 = 4.3190e-04
Loss = 1.0824e-01, PNorm = 75.8022, GNorm = 0.6346, lr_0 = 4.3161e-04
Loss = 1.3355e-01, PNorm = 75.8110, GNorm = 0.5091, lr_0 = 4.3131e-04
Loss = 1.1113e-01, PNorm = 75.8205, GNorm = 0.5165, lr_0 = 4.3102e-04
Loss = 1.0890e-01, PNorm = 75.8305, GNorm = 0.6914, lr_0 = 4.3072e-04
Loss = 1.3163e-01, PNorm = 75.8471, GNorm = 0.8468, lr_0 = 4.3043e-04
Loss = 1.2193e-01, PNorm = 75.8553, GNorm = 0.8013, lr_0 = 4.3013e-04
Loss = 1.1317e-01, PNorm = 75.8651, GNorm = 0.8443, lr_0 = 4.2984e-04
Loss = 9.9680e-02, PNorm = 75.8713, GNorm = 0.6466, lr_0 = 4.2954e-04
Loss = 1.3856e-01, PNorm = 75.8853, GNorm = 1.6722, lr_0 = 4.2925e-04
Loss = 1.1932e-01, PNorm = 75.9038, GNorm = 0.7727, lr_0 = 4.2895e-04
Loss = 1.3768e-01, PNorm = 75.9227, GNorm = 0.9216, lr_0 = 4.2866e-04
Loss = 1.1684e-01, PNorm = 75.9352, GNorm = 0.5591, lr_0 = 4.2837e-04
Loss = 1.3395e-01, PNorm = 75.9399, GNorm = 0.9700, lr_0 = 4.2807e-04
Loss = 1.3013e-01, PNorm = 75.9430, GNorm = 0.9064, lr_0 = 4.2778e-04
Loss = 1.1845e-01, PNorm = 75.9551, GNorm = 1.2171, lr_0 = 4.2749e-04
Loss = 1.1940e-01, PNorm = 75.9655, GNorm = 0.7008, lr_0 = 4.2719e-04
Loss = 1.1725e-01, PNorm = 75.9755, GNorm = 0.6059, lr_0 = 4.2690e-04
Loss = 1.0927e-01, PNorm = 75.9868, GNorm = 0.5461, lr_0 = 4.2661e-04
Loss = 1.1931e-01, PNorm = 75.9991, GNorm = 0.5519, lr_0 = 4.2632e-04
Loss = 1.3857e-01, PNorm = 76.0079, GNorm = 0.8372, lr_0 = 4.2602e-04
Loss = 1.2518e-01, PNorm = 76.0169, GNorm = 0.6461, lr_0 = 4.2573e-04
Loss = 1.2195e-01, PNorm = 76.0294, GNorm = 0.6662, lr_0 = 4.2544e-04
Loss = 1.1928e-01, PNorm = 76.0395, GNorm = 0.6793, lr_0 = 4.2515e-04
Loss = 1.2799e-01, PNorm = 76.0515, GNorm = 0.8946, lr_0 = 4.2486e-04
Loss = 1.2821e-01, PNorm = 76.0598, GNorm = 1.1149, lr_0 = 4.2457e-04
Loss = 1.4363e-01, PNorm = 76.0703, GNorm = 1.1023, lr_0 = 4.2428e-04
Loss = 1.1115e-01, PNorm = 76.0812, GNorm = 0.9421, lr_0 = 4.2399e-04
Loss = 1.4228e-01, PNorm = 76.0973, GNorm = 0.8034, lr_0 = 4.2370e-04
Loss = 1.2698e-01, PNorm = 76.1073, GNorm = 0.6013, lr_0 = 4.2340e-04
Loss = 1.1824e-01, PNorm = 76.1182, GNorm = 0.6042, lr_0 = 4.2311e-04
Loss = 1.1656e-01, PNorm = 76.1353, GNorm = 0.6171, lr_0 = 4.2283e-04
Loss = 1.2859e-01, PNorm = 76.1483, GNorm = 0.5937, lr_0 = 4.2254e-04
Loss = 1.2364e-01, PNorm = 76.1612, GNorm = 0.5690, lr_0 = 4.2225e-04
Loss = 1.2576e-01, PNorm = 76.1819, GNorm = 0.8584, lr_0 = 4.2196e-04
Loss = 1.1888e-01, PNorm = 76.1925, GNorm = 0.5458, lr_0 = 4.2167e-04
Loss = 1.3000e-01, PNorm = 76.2002, GNorm = 1.0084, lr_0 = 4.2138e-04
Loss = 1.2693e-01, PNorm = 76.2107, GNorm = 0.8113, lr_0 = 4.2109e-04
Loss = 1.4206e-01, PNorm = 76.2263, GNorm = 0.6497, lr_0 = 4.2080e-04
Loss = 1.1801e-01, PNorm = 76.2431, GNorm = 0.5354, lr_0 = 4.2051e-04
Loss = 1.1788e-01, PNorm = 76.2551, GNorm = 0.7681, lr_0 = 4.2023e-04
Loss = 1.3694e-01, PNorm = 76.2647, GNorm = 0.7177, lr_0 = 4.1994e-04
Loss = 1.2620e-01, PNorm = 76.2767, GNorm = 0.6931, lr_0 = 4.1965e-04
Loss = 1.1896e-01, PNorm = 76.2905, GNorm = 0.6396, lr_0 = 4.1936e-04
Loss = 1.3792e-01, PNorm = 76.3001, GNorm = 0.7508, lr_0 = 4.1907e-04
Loss = 1.2299e-01, PNorm = 76.3073, GNorm = 0.6863, lr_0 = 4.1879e-04
Loss = 1.1481e-01, PNorm = 76.3174, GNorm = 0.9270, lr_0 = 4.1850e-04
Loss = 1.1867e-01, PNorm = 76.3260, GNorm = 0.6073, lr_0 = 4.1821e-04
Loss = 1.3023e-01, PNorm = 76.3331, GNorm = 0.7305, lr_0 = 4.1793e-04
Loss = 1.3707e-01, PNorm = 76.3439, GNorm = 0.6310, lr_0 = 4.1764e-04
Loss = 1.2600e-01, PNorm = 76.3556, GNorm = 0.7814, lr_0 = 4.1736e-04
Loss = 1.3218e-01, PNorm = 76.3683, GNorm = 0.6978, lr_0 = 4.1707e-04
Loss = 1.3376e-01, PNorm = 76.3753, GNorm = 0.8272, lr_0 = 4.1678e-04
Loss = 1.1325e-01, PNorm = 76.3757, GNorm = 0.5415, lr_0 = 4.1650e-04
Loss = 1.2339e-01, PNorm = 76.3859, GNorm = 0.9544, lr_0 = 4.1621e-04
Loss = 1.2554e-01, PNorm = 76.3951, GNorm = 0.8136, lr_0 = 4.1593e-04
Loss = 1.0668e-01, PNorm = 76.4114, GNorm = 0.6477, lr_0 = 4.1564e-04
Loss = 1.2573e-01, PNorm = 76.4260, GNorm = 0.6910, lr_0 = 4.1536e-04
Loss = 1.2717e-01, PNorm = 76.4337, GNorm = 0.6092, lr_0 = 4.1507e-04
Loss = 1.1152e-01, PNorm = 76.4393, GNorm = 0.8354, lr_0 = 4.1479e-04
Loss = 1.0621e-01, PNorm = 76.4490, GNorm = 0.6767, lr_0 = 4.1450e-04
Loss = 1.2202e-01, PNorm = 76.4618, GNorm = 0.4926, lr_0 = 4.1422e-04
Loss = 1.3509e-01, PNorm = 76.4729, GNorm = 0.6476, lr_0 = 4.1394e-04
Loss = 1.3547e-01, PNorm = 76.4882, GNorm = 0.6442, lr_0 = 4.1365e-04
Loss = 1.0823e-01, PNorm = 76.4975, GNorm = 0.5466, lr_0 = 4.1337e-04
Loss = 1.2362e-01, PNorm = 76.5114, GNorm = 0.8020, lr_0 = 4.1309e-04
Loss = 1.1352e-01, PNorm = 76.5201, GNorm = 0.8584, lr_0 = 4.1280e-04
Loss = 1.1320e-01, PNorm = 76.5278, GNorm = 0.7602, lr_0 = 4.1252e-04
Loss = 1.2241e-01, PNorm = 76.5386, GNorm = 1.1567, lr_0 = 4.1224e-04
Loss = 1.2693e-01, PNorm = 76.5505, GNorm = 0.7032, lr_0 = 4.1196e-04
Loss = 1.1268e-01, PNorm = 76.5618, GNorm = 0.5850, lr_0 = 4.1167e-04
Loss = 1.2391e-01, PNorm = 76.5710, GNorm = 0.6866, lr_0 = 4.1139e-04
Loss = 1.1365e-01, PNorm = 76.5808, GNorm = 0.4984, lr_0 = 4.1111e-04
Loss = 1.1042e-01, PNorm = 76.5912, GNorm = 0.9137, lr_0 = 4.1083e-04
Loss = 1.2278e-01, PNorm = 76.5996, GNorm = 0.6433, lr_0 = 4.1055e-04
Loss = 1.1013e-01, PNorm = 76.6044, GNorm = 0.9794, lr_0 = 4.1027e-04
Loss = 1.2419e-01, PNorm = 76.6111, GNorm = 0.6890, lr_0 = 4.0998e-04
Loss = 1.0125e-01, PNorm = 76.6167, GNorm = 0.6142, lr_0 = 4.0970e-04
Loss = 1.0459e-01, PNorm = 76.6248, GNorm = 0.6418, lr_0 = 4.0942e-04
Loss = 1.4131e-01, PNorm = 76.6324, GNorm = 0.8048, lr_0 = 4.0914e-04
Loss = 1.1095e-01, PNorm = 76.6439, GNorm = 0.7478, lr_0 = 4.0886e-04
Loss = 1.2906e-01, PNorm = 76.6546, GNorm = 0.6295, lr_0 = 4.0858e-04
Loss = 1.2518e-01, PNorm = 76.6681, GNorm = 0.8600, lr_0 = 4.0830e-04
Loss = 1.1928e-01, PNorm = 76.6838, GNorm = 0.9644, lr_0 = 4.0802e-04
Loss = 1.2350e-01, PNorm = 76.7003, GNorm = 0.6866, lr_0 = 4.0774e-04
Loss = 1.4852e-01, PNorm = 76.7138, GNorm = 0.6745, lr_0 = 4.0746e-04
Loss = 1.2338e-01, PNorm = 76.7292, GNorm = 0.6480, lr_0 = 4.0718e-04
Loss = 1.3689e-01, PNorm = 76.7401, GNorm = 0.6598, lr_0 = 4.0691e-04
Loss = 1.1729e-01, PNorm = 76.7500, GNorm = 0.6483, lr_0 = 4.0663e-04
Loss = 1.0997e-01, PNorm = 76.7554, GNorm = 0.8417, lr_0 = 4.0635e-04
Loss = 1.1729e-01, PNorm = 76.7618, GNorm = 0.6717, lr_0 = 4.0607e-04
Loss = 1.2155e-01, PNorm = 76.7726, GNorm = 0.7074, lr_0 = 4.0579e-04
Loss = 1.3542e-01, PNorm = 76.7806, GNorm = 1.1846, lr_0 = 4.0551e-04
Loss = 1.2795e-01, PNorm = 76.7913, GNorm = 0.8712, lr_0 = 4.0524e-04
Loss = 1.2557e-01, PNorm = 76.8005, GNorm = 0.9666, lr_0 = 4.0496e-04
Loss = 1.2070e-01, PNorm = 76.8102, GNorm = 0.6737, lr_0 = 4.0468e-04
Validation mae = 0.232913
Epoch 13
Loss = 1.3005e-01, PNorm = 76.8242, GNorm = 0.6543, lr_0 = 4.0440e-04
Loss = 1.1084e-01, PNorm = 76.8372, GNorm = 0.5450, lr_0 = 4.0413e-04
Loss = 1.0582e-01, PNorm = 76.8507, GNorm = 0.5543, lr_0 = 4.0385e-04
Loss = 1.1700e-01, PNorm = 76.8643, GNorm = 0.9406, lr_0 = 4.0357e-04
Loss = 9.9412e-02, PNorm = 76.8737, GNorm = 0.6731, lr_0 = 4.0330e-04
Loss = 1.2559e-01, PNorm = 76.8824, GNorm = 0.6843, lr_0 = 4.0302e-04
Loss = 1.0441e-01, PNorm = 76.8957, GNorm = 0.6148, lr_0 = 4.0274e-04
Loss = 1.2330e-01, PNorm = 76.9054, GNorm = 1.0146, lr_0 = 4.0247e-04
Loss = 1.3067e-01, PNorm = 76.9220, GNorm = 0.8603, lr_0 = 4.0219e-04
Loss = 1.1009e-01, PNorm = 76.9302, GNorm = 0.7814, lr_0 = 4.0192e-04
Loss = 1.0926e-01, PNorm = 76.9449, GNorm = 0.5926, lr_0 = 4.0164e-04
Loss = 1.0017e-01, PNorm = 76.9575, GNorm = 0.5704, lr_0 = 4.0137e-04
Loss = 1.1622e-01, PNorm = 76.9687, GNorm = 0.9849, lr_0 = 4.0109e-04
Loss = 1.1408e-01, PNorm = 76.9806, GNorm = 0.7251, lr_0 = 4.0082e-04
Loss = 1.0296e-01, PNorm = 76.9868, GNorm = 0.8009, lr_0 = 4.0054e-04
Loss = 1.2280e-01, PNorm = 77.0009, GNorm = 0.5860, lr_0 = 4.0027e-04
Loss = 1.1181e-01, PNorm = 77.0132, GNorm = 0.4102, lr_0 = 3.9999e-04
Loss = 1.1225e-01, PNorm = 77.0252, GNorm = 0.5626, lr_0 = 3.9972e-04
Loss = 1.0225e-01, PNorm = 77.0368, GNorm = 0.5873, lr_0 = 3.9945e-04
Loss = 1.0845e-01, PNorm = 77.0447, GNorm = 0.6145, lr_0 = 3.9917e-04
Loss = 1.0782e-01, PNorm = 77.0498, GNorm = 0.6600, lr_0 = 3.9890e-04
Loss = 1.2424e-01, PNorm = 77.0601, GNorm = 0.6476, lr_0 = 3.9863e-04
Loss = 1.4378e-01, PNorm = 77.0727, GNorm = 0.7329, lr_0 = 3.9835e-04
Loss = 1.0213e-01, PNorm = 77.0813, GNorm = 0.5598, lr_0 = 3.9808e-04
Loss = 1.0580e-01, PNorm = 77.0878, GNorm = 0.7440, lr_0 = 3.9781e-04
Loss = 1.0895e-01, PNorm = 77.0920, GNorm = 0.8486, lr_0 = 3.9753e-04
Loss = 1.2160e-01, PNorm = 77.1043, GNorm = 0.9663, lr_0 = 3.9726e-04
Loss = 1.0200e-01, PNorm = 77.1209, GNorm = 0.4550, lr_0 = 3.9699e-04
Loss = 1.1751e-01, PNorm = 77.1317, GNorm = 0.4319, lr_0 = 3.9672e-04
Loss = 1.1132e-01, PNorm = 77.1449, GNorm = 0.5348, lr_0 = 3.9645e-04
Loss = 1.1219e-01, PNorm = 77.1541, GNorm = 0.7241, lr_0 = 3.9617e-04
Loss = 1.1783e-01, PNorm = 77.1657, GNorm = 0.7080, lr_0 = 3.9590e-04
Loss = 9.2037e-02, PNorm = 77.1775, GNorm = 0.7398, lr_0 = 3.9563e-04
Loss = 1.1941e-01, PNorm = 77.1848, GNorm = 0.9466, lr_0 = 3.9536e-04
Loss = 1.1601e-01, PNorm = 77.1958, GNorm = 0.5441, lr_0 = 3.9509e-04
Loss = 1.3013e-01, PNorm = 77.2082, GNorm = 0.8447, lr_0 = 3.9482e-04
Loss = 1.1263e-01, PNorm = 77.2179, GNorm = 0.5591, lr_0 = 3.9455e-04
Loss = 1.2392e-01, PNorm = 77.2289, GNorm = 0.5235, lr_0 = 3.9428e-04
Loss = 1.2645e-01, PNorm = 77.2404, GNorm = 0.7092, lr_0 = 3.9401e-04
Loss = 1.0278e-01, PNorm = 77.2500, GNorm = 0.6567, lr_0 = 3.9374e-04
Loss = 1.2608e-01, PNorm = 77.2638, GNorm = 0.5829, lr_0 = 3.9347e-04
Loss = 1.1590e-01, PNorm = 77.2789, GNorm = 0.5621, lr_0 = 3.9320e-04
Loss = 1.0856e-01, PNorm = 77.2861, GNorm = 0.6673, lr_0 = 3.9293e-04
Loss = 1.0727e-01, PNorm = 77.2962, GNorm = 0.7090, lr_0 = 3.9266e-04
Loss = 1.0653e-01, PNorm = 77.3101, GNorm = 0.6628, lr_0 = 3.9239e-04
Loss = 1.2639e-01, PNorm = 77.3170, GNorm = 0.8672, lr_0 = 3.9212e-04
Loss = 1.2565e-01, PNorm = 77.3260, GNorm = 0.7034, lr_0 = 3.9185e-04
Loss = 1.2401e-01, PNorm = 77.3329, GNorm = 1.1685, lr_0 = 3.9159e-04
Loss = 1.2787e-01, PNorm = 77.3453, GNorm = 1.0325, lr_0 = 3.9132e-04
Loss = 1.1198e-01, PNorm = 77.3583, GNorm = 0.4916, lr_0 = 3.9105e-04
Loss = 1.1386e-01, PNorm = 77.3677, GNorm = 0.5332, lr_0 = 3.9078e-04
Loss = 1.3238e-01, PNorm = 77.3789, GNorm = 0.6687, lr_0 = 3.9051e-04
Loss = 1.2139e-01, PNorm = 77.3898, GNorm = 0.7650, lr_0 = 3.9025e-04
Loss = 1.1891e-01, PNorm = 77.3995, GNorm = 0.8133, lr_0 = 3.8998e-04
Loss = 1.1200e-01, PNorm = 77.4097, GNorm = 0.8147, lr_0 = 3.8971e-04
Loss = 1.1747e-01, PNorm = 77.4180, GNorm = 1.0602, lr_0 = 3.8945e-04
Loss = 1.1460e-01, PNorm = 77.4237, GNorm = 0.6305, lr_0 = 3.8918e-04
Loss = 1.2842e-01, PNorm = 77.4288, GNorm = 0.7756, lr_0 = 3.8891e-04
Loss = 1.0314e-01, PNorm = 77.4385, GNorm = 0.4655, lr_0 = 3.8865e-04
Loss = 1.2733e-01, PNorm = 77.4495, GNorm = 0.6927, lr_0 = 3.8838e-04
Loss = 1.1662e-01, PNorm = 77.4582, GNorm = 0.9677, lr_0 = 3.8811e-04
Loss = 1.3085e-01, PNorm = 77.4651, GNorm = 0.4632, lr_0 = 3.8785e-04
Loss = 1.1190e-01, PNorm = 77.4775, GNorm = 0.6203, lr_0 = 3.8758e-04
Loss = 1.0323e-01, PNorm = 77.4834, GNorm = 0.5427, lr_0 = 3.8732e-04
Loss = 1.1960e-01, PNorm = 77.4916, GNorm = 0.5999, lr_0 = 3.8705e-04
Loss = 1.2179e-01, PNorm = 77.5040, GNorm = 0.6759, lr_0 = 3.8679e-04
Loss = 1.2675e-01, PNorm = 77.5080, GNorm = 0.6286, lr_0 = 3.8652e-04
Loss = 1.1954e-01, PNorm = 77.5205, GNorm = 0.7683, lr_0 = 3.8626e-04
Loss = 1.3750e-01, PNorm = 77.5310, GNorm = 0.6288, lr_0 = 3.8599e-04
Loss = 1.1755e-01, PNorm = 77.5472, GNorm = 0.6589, lr_0 = 3.8573e-04
Loss = 1.1411e-01, PNorm = 77.5571, GNorm = 0.8598, lr_0 = 3.8546e-04
Loss = 1.0685e-01, PNorm = 77.5652, GNorm = 0.6437, lr_0 = 3.8520e-04
Loss = 1.2661e-01, PNorm = 77.5696, GNorm = 0.7786, lr_0 = 3.8493e-04
Loss = 1.3738e-01, PNorm = 77.5758, GNorm = 0.6560, lr_0 = 3.8467e-04
Loss = 1.1130e-01, PNorm = 77.5820, GNorm = 0.8823, lr_0 = 3.8441e-04
Loss = 1.2477e-01, PNorm = 77.5909, GNorm = 0.6819, lr_0 = 3.8414e-04
Loss = 1.0592e-01, PNorm = 77.6007, GNorm = 0.5636, lr_0 = 3.8388e-04
Loss = 1.0672e-01, PNorm = 77.6073, GNorm = 0.5212, lr_0 = 3.8362e-04
Loss = 1.1941e-01, PNorm = 77.6163, GNorm = 0.5894, lr_0 = 3.8336e-04
Loss = 1.2272e-01, PNorm = 77.6282, GNorm = 0.7045, lr_0 = 3.8309e-04
Loss = 1.0516e-01, PNorm = 77.6369, GNorm = 0.6799, lr_0 = 3.8283e-04
Loss = 1.2167e-01, PNorm = 77.6445, GNorm = 0.5547, lr_0 = 3.8257e-04
Loss = 1.2287e-01, PNorm = 77.6540, GNorm = 0.6317, lr_0 = 3.8231e-04
Loss = 1.0567e-01, PNorm = 77.6709, GNorm = 0.4580, lr_0 = 3.8204e-04
Loss = 1.3116e-01, PNorm = 77.6812, GNorm = 0.6930, lr_0 = 3.8178e-04
Loss = 1.1611e-01, PNorm = 77.6872, GNorm = 0.5839, lr_0 = 3.8152e-04
Loss = 1.1308e-01, PNorm = 77.6996, GNorm = 0.7575, lr_0 = 3.8126e-04
Loss = 1.1115e-01, PNorm = 77.7062, GNorm = 0.6542, lr_0 = 3.8100e-04
Loss = 1.0260e-01, PNorm = 77.7145, GNorm = 0.6955, lr_0 = 3.8074e-04
Loss = 1.1040e-01, PNorm = 77.7206, GNorm = 0.7678, lr_0 = 3.8048e-04
Loss = 1.3700e-01, PNorm = 77.7286, GNorm = 0.6461, lr_0 = 3.8022e-04
Loss = 1.1104e-01, PNorm = 77.7413, GNorm = 0.6372, lr_0 = 3.7995e-04
Loss = 1.2450e-01, PNorm = 77.7468, GNorm = 0.5837, lr_0 = 3.7969e-04
Loss = 1.3299e-01, PNorm = 77.7603, GNorm = 0.6114, lr_0 = 3.7943e-04
Loss = 1.2743e-01, PNorm = 77.7723, GNorm = 0.5532, lr_0 = 3.7917e-04
Loss = 1.2780e-01, PNorm = 77.7869, GNorm = 0.6058, lr_0 = 3.7891e-04
Loss = 1.2022e-01, PNorm = 77.7976, GNorm = 0.6524, lr_0 = 3.7866e-04
Loss = 1.1264e-01, PNorm = 77.8054, GNorm = 0.4521, lr_0 = 3.7840e-04
Loss = 1.2362e-01, PNorm = 77.8167, GNorm = 0.4986, lr_0 = 3.7814e-04
Loss = 1.1895e-01, PNorm = 77.8197, GNorm = 0.9533, lr_0 = 3.7788e-04
Loss = 1.3574e-01, PNorm = 77.8296, GNorm = 0.6790, lr_0 = 3.7762e-04
Loss = 1.1541e-01, PNorm = 77.8372, GNorm = 0.8720, lr_0 = 3.7736e-04
Loss = 1.2495e-01, PNorm = 77.8419, GNorm = 0.9659, lr_0 = 3.7710e-04
Loss = 1.1576e-01, PNorm = 77.8517, GNorm = 1.0787, lr_0 = 3.7684e-04
Loss = 1.2877e-01, PNorm = 77.8538, GNorm = 0.5622, lr_0 = 3.7659e-04
Loss = 1.0263e-01, PNorm = 77.8573, GNorm = 0.6798, lr_0 = 3.7633e-04
Loss = 1.2225e-01, PNorm = 77.8630, GNorm = 1.2687, lr_0 = 3.7607e-04
Loss = 1.3352e-01, PNorm = 77.8725, GNorm = 0.6777, lr_0 = 3.7581e-04
Loss = 1.2536e-01, PNorm = 77.8882, GNorm = 0.7760, lr_0 = 3.7555e-04
Loss = 1.0956e-01, PNorm = 77.9004, GNorm = 0.7834, lr_0 = 3.7530e-04
Loss = 1.1507e-01, PNorm = 77.9074, GNorm = 0.5985, lr_0 = 3.7504e-04
Loss = 1.2234e-01, PNorm = 77.9102, GNorm = 0.8101, lr_0 = 3.7478e-04
Loss = 1.1307e-01, PNorm = 77.9165, GNorm = 0.8005, lr_0 = 3.7453e-04
Loss = 1.2971e-01, PNorm = 77.9258, GNorm = 0.6939, lr_0 = 3.7427e-04
Loss = 1.2856e-01, PNorm = 77.9360, GNorm = 0.4447, lr_0 = 3.7401e-04
Loss = 1.2993e-01, PNorm = 77.9452, GNorm = 0.6232, lr_0 = 3.7376e-04
Loss = 1.1296e-01, PNorm = 77.9564, GNorm = 0.6471, lr_0 = 3.7350e-04
Loss = 1.0565e-01, PNorm = 77.9648, GNorm = 0.5947, lr_0 = 3.7325e-04
Loss = 1.2510e-01, PNorm = 77.9687, GNorm = 1.2008, lr_0 = 3.7299e-04
Loss = 9.9717e-02, PNorm = 77.9742, GNorm = 0.7170, lr_0 = 3.7273e-04
Validation mae = 0.231604
Epoch 14
Loss = 1.0314e-01, PNorm = 77.9872, GNorm = 0.7237, lr_0 = 3.7248e-04
Loss = 1.0834e-01, PNorm = 77.9983, GNorm = 0.5451, lr_0 = 3.7222e-04
Loss = 1.0706e-01, PNorm = 78.0101, GNorm = 0.6725, lr_0 = 3.7197e-04
Loss = 1.1922e-01, PNorm = 78.0156, GNorm = 0.8206, lr_0 = 3.7171e-04
Loss = 1.1094e-01, PNorm = 78.0249, GNorm = 1.1622, lr_0 = 3.7146e-04
Loss = 1.2668e-01, PNorm = 78.0358, GNorm = 0.5083, lr_0 = 3.7120e-04
Loss = 1.0612e-01, PNorm = 78.0444, GNorm = 0.7004, lr_0 = 3.7095e-04
Loss = 1.0679e-01, PNorm = 78.0545, GNorm = 0.5567, lr_0 = 3.7070e-04
Loss = 1.0184e-01, PNorm = 78.0641, GNorm = 0.6619, lr_0 = 3.7044e-04
Loss = 9.6445e-02, PNorm = 78.0731, GNorm = 0.5378, lr_0 = 3.7019e-04
Loss = 1.1133e-01, PNorm = 78.0812, GNorm = 0.6798, lr_0 = 3.6993e-04
Loss = 1.0972e-01, PNorm = 78.0875, GNorm = 0.8247, lr_0 = 3.6968e-04
Loss = 1.0453e-01, PNorm = 78.0942, GNorm = 0.9322, lr_0 = 3.6943e-04
Loss = 1.0470e-01, PNorm = 78.1046, GNorm = 0.5215, lr_0 = 3.6917e-04
Loss = 9.9752e-02, PNorm = 78.1120, GNorm = 0.4643, lr_0 = 3.6892e-04
Loss = 1.2093e-01, PNorm = 78.1210, GNorm = 0.7350, lr_0 = 3.6867e-04
Loss = 9.8167e-02, PNorm = 78.1297, GNorm = 0.7343, lr_0 = 3.6842e-04
Loss = 9.8310e-02, PNorm = 78.1428, GNorm = 0.5731, lr_0 = 3.6816e-04
Loss = 1.2262e-01, PNorm = 78.1539, GNorm = 0.6529, lr_0 = 3.6791e-04
Loss = 1.0946e-01, PNorm = 78.1648, GNorm = 0.5827, lr_0 = 3.6766e-04
Loss = 1.2313e-01, PNorm = 78.1749, GNorm = 0.8040, lr_0 = 3.6741e-04
Loss = 1.2796e-01, PNorm = 78.1818, GNorm = 0.5530, lr_0 = 3.6716e-04
Loss = 1.1263e-01, PNorm = 78.1861, GNorm = 0.7074, lr_0 = 3.6690e-04
Loss = 1.0945e-01, PNorm = 78.1943, GNorm = 0.6259, lr_0 = 3.6665e-04
Loss = 1.1264e-01, PNorm = 78.2025, GNorm = 0.5760, lr_0 = 3.6640e-04
Loss = 1.0290e-01, PNorm = 78.2062, GNorm = 0.7288, lr_0 = 3.6615e-04
Loss = 1.1398e-01, PNorm = 78.2129, GNorm = 0.4762, lr_0 = 3.6590e-04
Loss = 1.0676e-01, PNorm = 78.2195, GNorm = 0.8617, lr_0 = 3.6565e-04
Loss = 1.0914e-01, PNorm = 78.2296, GNorm = 0.5401, lr_0 = 3.6540e-04
Loss = 1.0451e-01, PNorm = 78.2444, GNorm = 0.6401, lr_0 = 3.6515e-04
Loss = 1.1763e-01, PNorm = 78.2506, GNorm = 0.6009, lr_0 = 3.6490e-04
Loss = 1.0136e-01, PNorm = 78.2582, GNorm = 0.6945, lr_0 = 3.6465e-04
Loss = 1.0560e-01, PNorm = 78.2667, GNorm = 1.1749, lr_0 = 3.6440e-04
Loss = 1.3149e-01, PNorm = 78.2751, GNorm = 0.5724, lr_0 = 3.6415e-04
Loss = 1.0347e-01, PNorm = 78.2859, GNorm = 0.7766, lr_0 = 3.6390e-04
Loss = 1.0752e-01, PNorm = 78.2945, GNorm = 0.7314, lr_0 = 3.6365e-04
Loss = 1.1568e-01, PNorm = 78.3087, GNorm = 0.8220, lr_0 = 3.6340e-04
Loss = 1.1136e-01, PNorm = 78.3192, GNorm = 0.9594, lr_0 = 3.6315e-04
Loss = 1.1061e-01, PNorm = 78.3257, GNorm = 0.5560, lr_0 = 3.6290e-04
Loss = 1.0777e-01, PNorm = 78.3360, GNorm = 0.6606, lr_0 = 3.6266e-04
Loss = 1.2125e-01, PNorm = 78.3396, GNorm = 0.6191, lr_0 = 3.6241e-04
Loss = 1.1444e-01, PNorm = 78.3501, GNorm = 0.5517, lr_0 = 3.6216e-04
Loss = 1.0443e-01, PNorm = 78.3562, GNorm = 0.6396, lr_0 = 3.6191e-04
Loss = 1.0999e-01, PNorm = 78.3645, GNorm = 0.7848, lr_0 = 3.6166e-04
Loss = 1.2594e-01, PNorm = 78.3779, GNorm = 1.2515, lr_0 = 3.6141e-04
Loss = 1.1174e-01, PNorm = 78.3917, GNorm = 0.6142, lr_0 = 3.6117e-04
Loss = 1.0099e-01, PNorm = 78.4036, GNorm = 0.5320, lr_0 = 3.6092e-04
Loss = 1.0921e-01, PNorm = 78.4103, GNorm = 0.4845, lr_0 = 3.6067e-04
Loss = 1.3116e-01, PNorm = 78.4168, GNorm = 0.6947, lr_0 = 3.6043e-04
Loss = 1.0287e-01, PNorm = 78.4225, GNorm = 0.6175, lr_0 = 3.6018e-04
Loss = 1.1766e-01, PNorm = 78.4315, GNorm = 0.7603, lr_0 = 3.5993e-04
Loss = 1.1381e-01, PNorm = 78.4378, GNorm = 0.7846, lr_0 = 3.5969e-04
Loss = 1.1696e-01, PNorm = 78.4457, GNorm = 0.4634, lr_0 = 3.5944e-04
Loss = 1.1404e-01, PNorm = 78.4541, GNorm = 0.5199, lr_0 = 3.5919e-04
Loss = 1.1044e-01, PNorm = 78.4613, GNorm = 0.6561, lr_0 = 3.5895e-04
Loss = 1.0952e-01, PNorm = 78.4711, GNorm = 0.5649, lr_0 = 3.5870e-04
Loss = 1.0117e-01, PNorm = 78.4812, GNorm = 0.7148, lr_0 = 3.5845e-04
Loss = 1.2711e-01, PNorm = 78.4889, GNorm = 0.6012, lr_0 = 3.5821e-04
Loss = 1.1715e-01, PNorm = 78.4982, GNorm = 0.6528, lr_0 = 3.5796e-04
Loss = 1.2244e-01, PNorm = 78.5084, GNorm = 0.6751, lr_0 = 3.5772e-04
Loss = 1.0713e-01, PNorm = 78.5156, GNorm = 0.5174, lr_0 = 3.5747e-04
Loss = 1.0391e-01, PNorm = 78.5248, GNorm = 0.7096, lr_0 = 3.5723e-04
Loss = 1.0664e-01, PNorm = 78.5309, GNorm = 0.5067, lr_0 = 3.5698e-04
Loss = 1.1066e-01, PNorm = 78.5392, GNorm = 0.5783, lr_0 = 3.5674e-04
Loss = 1.1760e-01, PNorm = 78.5487, GNorm = 0.5646, lr_0 = 3.5650e-04
Loss = 1.0544e-01, PNorm = 78.5592, GNorm = 1.1903, lr_0 = 3.5625e-04
Loss = 1.2549e-01, PNorm = 78.5712, GNorm = 0.6803, lr_0 = 3.5601e-04
Loss = 1.0543e-01, PNorm = 78.5797, GNorm = 1.1910, lr_0 = 3.5576e-04
Loss = 1.1193e-01, PNorm = 78.5838, GNorm = 0.6956, lr_0 = 3.5552e-04
Loss = 1.0865e-01, PNorm = 78.5893, GNorm = 0.6053, lr_0 = 3.5528e-04
Loss = 1.2234e-01, PNorm = 78.5961, GNorm = 0.5296, lr_0 = 3.5503e-04
Loss = 1.1796e-01, PNorm = 78.6039, GNorm = 0.6096, lr_0 = 3.5479e-04
Loss = 1.3302e-01, PNorm = 78.6138, GNorm = 0.6652, lr_0 = 3.5455e-04
Loss = 1.0215e-01, PNorm = 78.6216, GNorm = 0.6256, lr_0 = 3.5430e-04
Loss = 1.0904e-01, PNorm = 78.6335, GNorm = 0.5699, lr_0 = 3.5406e-04
Loss = 1.0949e-01, PNorm = 78.6409, GNorm = 0.4296, lr_0 = 3.5382e-04
Loss = 1.3363e-01, PNorm = 78.6474, GNorm = 0.9683, lr_0 = 3.5358e-04
Loss = 1.1015e-01, PNorm = 78.6523, GNorm = 0.6545, lr_0 = 3.5333e-04
Loss = 1.1477e-01, PNorm = 78.6586, GNorm = 0.6236, lr_0 = 3.5309e-04
Loss = 1.0339e-01, PNorm = 78.6651, GNorm = 0.6415, lr_0 = 3.5285e-04
Loss = 1.2156e-01, PNorm = 78.6767, GNorm = 0.6337, lr_0 = 3.5261e-04
Loss = 1.0477e-01, PNorm = 78.6862, GNorm = 0.8681, lr_0 = 3.5237e-04
Loss = 1.0656e-01, PNorm = 78.6945, GNorm = 0.6997, lr_0 = 3.5212e-04
Loss = 1.2314e-01, PNorm = 78.7061, GNorm = 0.6111, lr_0 = 3.5188e-04
Loss = 1.0226e-01, PNorm = 78.7157, GNorm = 0.5771, lr_0 = 3.5164e-04
Loss = 1.1858e-01, PNorm = 78.7184, GNorm = 0.6751, lr_0 = 3.5140e-04
Loss = 1.2784e-01, PNorm = 78.7282, GNorm = 0.7100, lr_0 = 3.5116e-04
Loss = 1.2022e-01, PNorm = 78.7368, GNorm = 0.7797, lr_0 = 3.5092e-04
Loss = 1.1700e-01, PNorm = 78.7474, GNorm = 1.2832, lr_0 = 3.5068e-04
Loss = 1.2484e-01, PNorm = 78.7591, GNorm = 0.6508, lr_0 = 3.5044e-04
Loss = 1.0037e-01, PNorm = 78.7680, GNorm = 0.7714, lr_0 = 3.5020e-04
Loss = 1.1311e-01, PNorm = 78.7762, GNorm = 0.6428, lr_0 = 3.4996e-04
Loss = 1.0770e-01, PNorm = 78.7844, GNorm = 0.5231, lr_0 = 3.4972e-04
Loss = 1.0568e-01, PNorm = 78.7908, GNorm = 1.0064, lr_0 = 3.4948e-04
Loss = 1.1298e-01, PNorm = 78.7948, GNorm = 0.9583, lr_0 = 3.4924e-04
Loss = 1.1535e-01, PNorm = 78.8059, GNorm = 0.7428, lr_0 = 3.4900e-04
Loss = 1.1423e-01, PNorm = 78.8162, GNorm = 1.4017, lr_0 = 3.4876e-04
Loss = 1.1834e-01, PNorm = 78.8237, GNorm = 0.8207, lr_0 = 3.4852e-04
Loss = 1.2714e-01, PNorm = 78.8333, GNorm = 0.6786, lr_0 = 3.4828e-04
Loss = 9.7352e-02, PNorm = 78.8429, GNorm = 0.6583, lr_0 = 3.4805e-04
Loss = 1.0120e-01, PNorm = 78.8478, GNorm = 0.7246, lr_0 = 3.4781e-04
Loss = 1.0203e-01, PNorm = 78.8532, GNorm = 0.5732, lr_0 = 3.4757e-04
Loss = 1.1621e-01, PNorm = 78.8590, GNorm = 0.5843, lr_0 = 3.4733e-04
Loss = 1.0557e-01, PNorm = 78.8683, GNorm = 0.4487, lr_0 = 3.4709e-04
Loss = 1.1677e-01, PNorm = 78.8790, GNorm = 0.8964, lr_0 = 3.4686e-04
Loss = 1.2314e-01, PNorm = 78.8819, GNorm = 0.6873, lr_0 = 3.4662e-04
Loss = 1.1850e-01, PNorm = 78.8897, GNorm = 0.7187, lr_0 = 3.4638e-04
Loss = 1.2019e-01, PNorm = 78.8978, GNorm = 0.7075, lr_0 = 3.4614e-04
Loss = 9.9755e-02, PNorm = 78.9033, GNorm = 0.8128, lr_0 = 3.4591e-04
Loss = 1.2852e-01, PNorm = 78.9056, GNorm = 0.6365, lr_0 = 3.4567e-04
Loss = 1.3038e-01, PNorm = 78.9138, GNorm = 0.6342, lr_0 = 3.4543e-04
Loss = 1.1726e-01, PNorm = 78.9194, GNorm = 0.6597, lr_0 = 3.4520e-04
Loss = 1.0883e-01, PNorm = 78.9248, GNorm = 0.6332, lr_0 = 3.4496e-04
Loss = 1.0397e-01, PNorm = 78.9319, GNorm = 0.7257, lr_0 = 3.4472e-04
Loss = 1.2091e-01, PNorm = 78.9394, GNorm = 0.6805, lr_0 = 3.4449e-04
Loss = 1.1114e-01, PNorm = 78.9475, GNorm = 0.8314, lr_0 = 3.4425e-04
Loss = 1.0750e-01, PNorm = 78.9549, GNorm = 0.6275, lr_0 = 3.4402e-04
Loss = 1.1532e-01, PNorm = 78.9650, GNorm = 0.7196, lr_0 = 3.4378e-04
Loss = 1.1373e-01, PNorm = 78.9724, GNorm = 0.7604, lr_0 = 3.4354e-04
Loss = 1.0428e-01, PNorm = 78.9765, GNorm = 0.4512, lr_0 = 3.4331e-04
Validation mae = 0.228951
Epoch 15
Loss = 1.0857e-01, PNorm = 78.9838, GNorm = 0.5657, lr_0 = 3.4307e-04
Loss = 1.0341e-01, PNorm = 78.9922, GNorm = 0.6379, lr_0 = 3.4284e-04
Loss = 1.0641e-01, PNorm = 79.0009, GNorm = 0.8690, lr_0 = 3.4260e-04
Loss = 1.1820e-01, PNorm = 79.0075, GNorm = 0.6422, lr_0 = 3.4237e-04
Loss = 1.1592e-01, PNorm = 79.0168, GNorm = 0.4781, lr_0 = 3.4213e-04
Loss = 1.0402e-01, PNorm = 79.0269, GNorm = 0.6656, lr_0 = 3.4190e-04
Loss = 9.3331e-02, PNorm = 79.0362, GNorm = 0.5900, lr_0 = 3.4167e-04
Loss = 1.0012e-01, PNorm = 79.0459, GNorm = 0.4807, lr_0 = 3.4143e-04
Loss = 9.8756e-02, PNorm = 79.0516, GNorm = 0.7457, lr_0 = 3.4120e-04
Loss = 9.2645e-02, PNorm = 79.0559, GNorm = 0.6648, lr_0 = 3.4096e-04
Loss = 9.3284e-02, PNorm = 79.0651, GNorm = 0.5258, lr_0 = 3.4073e-04
Loss = 1.0989e-01, PNorm = 79.0742, GNorm = 0.5374, lr_0 = 3.4050e-04
Loss = 1.0962e-01, PNorm = 79.0815, GNorm = 0.6169, lr_0 = 3.4026e-04
Loss = 1.0917e-01, PNorm = 79.0909, GNorm = 0.5274, lr_0 = 3.4003e-04
Loss = 1.0854e-01, PNorm = 79.0992, GNorm = 0.5684, lr_0 = 3.3980e-04
Loss = 1.0377e-01, PNorm = 79.1078, GNorm = 0.9181, lr_0 = 3.3956e-04
Loss = 1.0521e-01, PNorm = 79.1138, GNorm = 0.6153, lr_0 = 3.3933e-04
Loss = 1.1727e-01, PNorm = 79.1206, GNorm = 0.9180, lr_0 = 3.3910e-04
Loss = 1.3125e-01, PNorm = 79.1267, GNorm = 0.7690, lr_0 = 3.3887e-04
Loss = 1.0929e-01, PNorm = 79.1346, GNorm = 0.7981, lr_0 = 3.3864e-04
Loss = 1.0881e-01, PNorm = 79.1400, GNorm = 0.6286, lr_0 = 3.3840e-04
Loss = 1.0733e-01, PNorm = 79.1523, GNorm = 0.6441, lr_0 = 3.3817e-04
Loss = 1.3296e-01, PNorm = 79.1632, GNorm = 0.9476, lr_0 = 3.3794e-04
Loss = 1.0528e-01, PNorm = 79.1719, GNorm = 0.5854, lr_0 = 3.3771e-04
Loss = 1.0411e-01, PNorm = 79.1790, GNorm = 0.7030, lr_0 = 3.3748e-04
Loss = 1.1817e-01, PNorm = 79.1887, GNorm = 1.2008, lr_0 = 3.3725e-04
Loss = 1.2546e-01, PNorm = 79.1959, GNorm = 0.8295, lr_0 = 3.3701e-04
Loss = 1.0487e-01, PNorm = 79.2063, GNorm = 0.6130, lr_0 = 3.3678e-04
Loss = 1.0867e-01, PNorm = 79.2120, GNorm = 0.6936, lr_0 = 3.3655e-04
Loss = 1.0588e-01, PNorm = 79.2192, GNorm = 1.1078, lr_0 = 3.3632e-04
Loss = 1.0129e-01, PNorm = 79.2271, GNorm = 0.6846, lr_0 = 3.3609e-04
Loss = 1.1627e-01, PNorm = 79.2341, GNorm = 0.6162, lr_0 = 3.3586e-04
Loss = 1.1778e-01, PNorm = 79.2442, GNorm = 0.9925, lr_0 = 3.3563e-04
Loss = 1.0227e-01, PNorm = 79.2499, GNorm = 0.4982, lr_0 = 3.3540e-04
Loss = 1.0329e-01, PNorm = 79.2570, GNorm = 0.5464, lr_0 = 3.3517e-04
Loss = 1.1653e-01, PNorm = 79.2679, GNorm = 0.5801, lr_0 = 3.3494e-04
Loss = 1.1183e-01, PNorm = 79.2780, GNorm = 0.5661, lr_0 = 3.3471e-04
Loss = 9.1048e-02, PNorm = 79.2853, GNorm = 0.7221, lr_0 = 3.3448e-04
Loss = 9.8960e-02, PNorm = 79.2902, GNorm = 0.8515, lr_0 = 3.3425e-04
Loss = 9.3445e-02, PNorm = 79.2957, GNorm = 0.7265, lr_0 = 3.3403e-04
Loss = 1.0771e-01, PNorm = 79.2999, GNorm = 0.6967, lr_0 = 3.3380e-04
Loss = 1.0677e-01, PNorm = 79.3062, GNorm = 0.6989, lr_0 = 3.3357e-04
Loss = 1.0700e-01, PNorm = 79.3098, GNorm = 0.5953, lr_0 = 3.3334e-04
Loss = 1.1327e-01, PNorm = 79.3141, GNorm = 0.7278, lr_0 = 3.3311e-04
Loss = 9.4667e-02, PNorm = 79.3198, GNorm = 0.5373, lr_0 = 3.3288e-04
Loss = 1.1558e-01, PNorm = 79.3253, GNorm = 0.7447, lr_0 = 3.3265e-04
Loss = 1.2723e-01, PNorm = 79.3367, GNorm = 0.7092, lr_0 = 3.3243e-04
Loss = 9.7204e-02, PNorm = 79.3466, GNorm = 0.6398, lr_0 = 3.3220e-04
Loss = 1.2761e-01, PNorm = 79.3578, GNorm = 0.5675, lr_0 = 3.3197e-04
Loss = 9.3805e-02, PNorm = 79.3658, GNorm = 0.5160, lr_0 = 3.3174e-04
Loss = 1.1111e-01, PNorm = 79.3736, GNorm = 0.7415, lr_0 = 3.3152e-04
Loss = 1.1624e-01, PNorm = 79.3812, GNorm = 0.7167, lr_0 = 3.3129e-04
Loss = 1.0185e-01, PNorm = 79.3875, GNorm = 0.7316, lr_0 = 3.3106e-04
Loss = 1.1306e-01, PNorm = 79.3956, GNorm = 0.7287, lr_0 = 3.3084e-04
Loss = 1.1892e-01, PNorm = 79.4037, GNorm = 0.5703, lr_0 = 3.3061e-04
Loss = 1.0888e-01, PNorm = 79.4127, GNorm = 0.6415, lr_0 = 3.3038e-04
Loss = 1.0786e-01, PNorm = 79.4212, GNorm = 0.6630, lr_0 = 3.3016e-04
Loss = 1.1487e-01, PNorm = 79.4300, GNorm = 0.7295, lr_0 = 3.2993e-04
Loss = 9.7953e-02, PNorm = 79.4377, GNorm = 0.5467, lr_0 = 3.2970e-04
Loss = 1.0855e-01, PNorm = 79.4449, GNorm = 0.9316, lr_0 = 3.2948e-04
Loss = 9.2772e-02, PNorm = 79.4538, GNorm = 0.5571, lr_0 = 3.2925e-04
Loss = 1.1122e-01, PNorm = 79.4594, GNorm = 0.7378, lr_0 = 3.2903e-04
Loss = 1.1519e-01, PNorm = 79.4677, GNorm = 0.7295, lr_0 = 3.2880e-04
Loss = 1.0058e-01, PNorm = 79.4762, GNorm = 0.5416, lr_0 = 3.2858e-04
Loss = 9.0429e-02, PNorm = 79.4848, GNorm = 0.9297, lr_0 = 3.2835e-04
Loss = 1.1632e-01, PNorm = 79.4925, GNorm = 0.8058, lr_0 = 3.2813e-04
Loss = 1.0179e-01, PNorm = 79.5015, GNorm = 0.5539, lr_0 = 3.2790e-04
Loss = 1.1593e-01, PNorm = 79.5111, GNorm = 0.5022, lr_0 = 3.2768e-04
Loss = 1.0485e-01, PNorm = 79.5188, GNorm = 0.6350, lr_0 = 3.2745e-04
Loss = 9.7292e-02, PNorm = 79.5268, GNorm = 0.5469, lr_0 = 3.2723e-04
Loss = 1.1127e-01, PNorm = 79.5359, GNorm = 0.6239, lr_0 = 3.2700e-04
Loss = 1.1150e-01, PNorm = 79.5451, GNorm = 0.6490, lr_0 = 3.2678e-04
Loss = 9.1561e-02, PNorm = 79.5518, GNorm = 0.5654, lr_0 = 3.2656e-04
Loss = 1.1053e-01, PNorm = 79.5592, GNorm = 0.9009, lr_0 = 3.2633e-04
Loss = 1.1272e-01, PNorm = 79.5723, GNorm = 0.8207, lr_0 = 3.2611e-04
Loss = 1.1935e-01, PNorm = 79.5804, GNorm = 0.8816, lr_0 = 3.2589e-04
Loss = 1.1677e-01, PNorm = 79.5887, GNorm = 0.5757, lr_0 = 3.2566e-04
Loss = 1.1033e-01, PNorm = 79.5954, GNorm = 0.5449, lr_0 = 3.2544e-04
Loss = 9.6821e-02, PNorm = 79.5995, GNorm = 0.9096, lr_0 = 3.2522e-04
Loss = 9.9452e-02, PNorm = 79.6097, GNorm = 0.7704, lr_0 = 3.2499e-04
Loss = 1.0229e-01, PNorm = 79.6164, GNorm = 0.8202, lr_0 = 3.2477e-04
Loss = 1.0220e-01, PNorm = 79.6245, GNorm = 0.5059, lr_0 = 3.2455e-04
Loss = 1.0256e-01, PNorm = 79.6343, GNorm = 0.7485, lr_0 = 3.2433e-04
Loss = 1.1149e-01, PNorm = 79.6385, GNorm = 0.8268, lr_0 = 3.2410e-04
Loss = 1.0760e-01, PNorm = 79.6476, GNorm = 0.6006, lr_0 = 3.2388e-04
Loss = 1.0851e-01, PNorm = 79.6545, GNorm = 0.7406, lr_0 = 3.2366e-04
Loss = 1.0721e-01, PNorm = 79.6624, GNorm = 0.7514, lr_0 = 3.2344e-04
Loss = 1.0797e-01, PNorm = 79.6697, GNorm = 0.6105, lr_0 = 3.2322e-04
Loss = 1.1728e-01, PNorm = 79.6770, GNorm = 0.6151, lr_0 = 3.2300e-04
Loss = 1.1634e-01, PNorm = 79.6859, GNorm = 0.5217, lr_0 = 3.2277e-04
Loss = 1.0491e-01, PNorm = 79.6954, GNorm = 0.5660, lr_0 = 3.2255e-04
Loss = 1.0002e-01, PNorm = 79.7064, GNorm = 0.6190, lr_0 = 3.2233e-04
Loss = 1.0936e-01, PNorm = 79.7127, GNorm = 0.4838, lr_0 = 3.2211e-04
Loss = 1.1282e-01, PNorm = 79.7181, GNorm = 0.6178, lr_0 = 3.2189e-04
Loss = 9.1815e-02, PNorm = 79.7259, GNorm = 0.7943, lr_0 = 3.2167e-04
Loss = 1.0457e-01, PNorm = 79.7333, GNorm = 0.8119, lr_0 = 3.2145e-04
Loss = 1.0677e-01, PNorm = 79.7408, GNorm = 1.0275, lr_0 = 3.2123e-04
Loss = 1.0729e-01, PNorm = 79.7457, GNorm = 0.5836, lr_0 = 3.2101e-04
Loss = 1.1396e-01, PNorm = 79.7555, GNorm = 0.7045, lr_0 = 3.2079e-04
Loss = 1.3003e-01, PNorm = 79.7614, GNorm = 1.0422, lr_0 = 3.2057e-04
Loss = 1.0753e-01, PNorm = 79.7680, GNorm = 0.7651, lr_0 = 3.2035e-04
Loss = 1.1004e-01, PNorm = 79.7743, GNorm = 0.6436, lr_0 = 3.2013e-04
Loss = 1.0339e-01, PNorm = 79.7768, GNorm = 0.5845, lr_0 = 3.1991e-04
Loss = 1.0314e-01, PNorm = 79.7812, GNorm = 0.5792, lr_0 = 3.1969e-04
Loss = 1.1143e-01, PNorm = 79.7862, GNorm = 0.9015, lr_0 = 3.1947e-04
Loss = 1.0918e-01, PNorm = 79.7941, GNorm = 0.6418, lr_0 = 3.1925e-04
Loss = 1.0466e-01, PNorm = 79.8027, GNorm = 0.8856, lr_0 = 3.1904e-04
Loss = 1.1970e-01, PNorm = 79.8100, GNorm = 0.6241, lr_0 = 3.1882e-04
Loss = 1.1215e-01, PNorm = 79.8186, GNorm = 0.7371, lr_0 = 3.1860e-04
Loss = 1.0290e-01, PNorm = 79.8229, GNorm = 0.5669, lr_0 = 3.1838e-04
Loss = 1.1618e-01, PNorm = 79.8301, GNorm = 0.7711, lr_0 = 3.1816e-04
Loss = 1.0903e-01, PNorm = 79.8383, GNorm = 0.7766, lr_0 = 3.1794e-04
Loss = 1.1661e-01, PNorm = 79.8419, GNorm = 0.5620, lr_0 = 3.1773e-04
Loss = 1.0553e-01, PNorm = 79.8501, GNorm = 0.7024, lr_0 = 3.1751e-04
Loss = 1.2685e-01, PNorm = 79.8652, GNorm = 0.8528, lr_0 = 3.1729e-04
Loss = 9.7797e-02, PNorm = 79.8741, GNorm = 0.9218, lr_0 = 3.1707e-04
Loss = 1.2454e-01, PNorm = 79.8800, GNorm = 0.7159, lr_0 = 3.1686e-04
Loss = 8.6537e-02, PNorm = 79.8864, GNorm = 0.6975, lr_0 = 3.1664e-04
Loss = 1.1703e-01, PNorm = 79.8877, GNorm = 0.6213, lr_0 = 3.1642e-04
Loss = 1.1674e-01, PNorm = 79.8916, GNorm = 0.6239, lr_0 = 3.1621e-04
Validation mae = 0.237774
Epoch 16
Loss = 9.8621e-02, PNorm = 79.9005, GNorm = 0.6441, lr_0 = 3.1599e-04
Loss = 1.1202e-01, PNorm = 79.9088, GNorm = 0.7747, lr_0 = 3.1577e-04
Loss = 8.9984e-02, PNorm = 79.9162, GNorm = 0.5943, lr_0 = 3.1556e-04
Loss = 1.1639e-01, PNorm = 79.9234, GNorm = 0.4845, lr_0 = 3.1534e-04
Loss = 1.1029e-01, PNorm = 79.9319, GNorm = 0.5971, lr_0 = 3.1512e-04
Loss = 9.8395e-02, PNorm = 79.9420, GNorm = 0.5410, lr_0 = 3.1491e-04
Loss = 1.0034e-01, PNorm = 79.9504, GNorm = 0.7219, lr_0 = 3.1469e-04
Loss = 9.9378e-02, PNorm = 79.9565, GNorm = 0.7970, lr_0 = 3.1448e-04
Loss = 9.1518e-02, PNorm = 79.9596, GNorm = 0.5261, lr_0 = 3.1426e-04
Loss = 1.0349e-01, PNorm = 79.9612, GNorm = 0.8005, lr_0 = 3.1405e-04
Loss = 9.5804e-02, PNorm = 79.9694, GNorm = 0.5706, lr_0 = 3.1383e-04
Loss = 9.4326e-02, PNorm = 79.9770, GNorm = 0.6749, lr_0 = 3.1362e-04
Loss = 1.1555e-01, PNorm = 79.9858, GNorm = 0.9812, lr_0 = 3.1340e-04
Loss = 9.9900e-02, PNorm = 79.9966, GNorm = 0.5299, lr_0 = 3.1319e-04
Loss = 9.3301e-02, PNorm = 80.0018, GNorm = 0.5330, lr_0 = 3.1297e-04
Loss = 9.9560e-02, PNorm = 80.0094, GNorm = 0.7798, lr_0 = 3.1276e-04
Loss = 1.0520e-01, PNorm = 80.0169, GNorm = 0.5719, lr_0 = 3.1254e-04
Loss = 1.0030e-01, PNorm = 80.0238, GNorm = 0.8636, lr_0 = 3.1233e-04
Loss = 1.0025e-01, PNorm = 80.0342, GNorm = 0.8562, lr_0 = 3.1212e-04
Loss = 1.1576e-01, PNorm = 80.0401, GNorm = 0.5517, lr_0 = 3.1190e-04
Loss = 9.8423e-02, PNorm = 80.0470, GNorm = 0.5338, lr_0 = 3.1169e-04
Loss = 1.0026e-01, PNorm = 80.0547, GNorm = 0.8269, lr_0 = 3.1147e-04
Loss = 9.3882e-02, PNorm = 80.0628, GNorm = 0.4938, lr_0 = 3.1126e-04
Loss = 9.4080e-02, PNorm = 80.0683, GNorm = 0.5025, lr_0 = 3.1105e-04
Loss = 9.5787e-02, PNorm = 80.0736, GNorm = 0.4723, lr_0 = 3.1083e-04
Loss = 1.0977e-01, PNorm = 80.0809, GNorm = 0.7249, lr_0 = 3.1062e-04
Loss = 1.0556e-01, PNorm = 80.0896, GNorm = 0.6103, lr_0 = 3.1041e-04
Loss = 1.0597e-01, PNorm = 80.1012, GNorm = 0.5881, lr_0 = 3.1020e-04
Loss = 1.0720e-01, PNorm = 80.1077, GNorm = 0.5649, lr_0 = 3.0998e-04
Loss = 9.5999e-02, PNorm = 80.1132, GNorm = 0.8797, lr_0 = 3.0977e-04
Loss = 9.9532e-02, PNorm = 80.1207, GNorm = 0.6194, lr_0 = 3.0956e-04
Loss = 1.0820e-01, PNorm = 80.1307, GNorm = 0.5488, lr_0 = 3.0935e-04
Loss = 9.9366e-02, PNorm = 80.1399, GNorm = 0.5608, lr_0 = 3.0914e-04
Loss = 1.0959e-01, PNorm = 80.1475, GNorm = 0.9591, lr_0 = 3.0892e-04
Loss = 1.0370e-01, PNorm = 80.1534, GNorm = 0.8030, lr_0 = 3.0871e-04
Loss = 9.9278e-02, PNorm = 80.1587, GNorm = 0.7044, lr_0 = 3.0850e-04
Loss = 1.0416e-01, PNorm = 80.1640, GNorm = 0.6824, lr_0 = 3.0829e-04
Loss = 1.0292e-01, PNorm = 80.1707, GNorm = 1.0000, lr_0 = 3.0808e-04
Loss = 1.1539e-01, PNorm = 80.1773, GNorm = 0.8230, lr_0 = 3.0787e-04
Loss = 1.1531e-01, PNorm = 80.1861, GNorm = 0.6986, lr_0 = 3.0766e-04
Loss = 9.9279e-02, PNorm = 80.1905, GNorm = 0.4056, lr_0 = 3.0745e-04
Loss = 9.6287e-02, PNorm = 80.1991, GNorm = 0.4787, lr_0 = 3.0723e-04
Loss = 9.9788e-02, PNorm = 80.2042, GNorm = 0.5561, lr_0 = 3.0702e-04
Loss = 9.7345e-02, PNorm = 80.2105, GNorm = 0.8953, lr_0 = 3.0681e-04
Loss = 1.0086e-01, PNorm = 80.2147, GNorm = 0.5255, lr_0 = 3.0660e-04
Loss = 8.5060e-02, PNorm = 80.2174, GNorm = 0.7408, lr_0 = 3.0639e-04
Loss = 1.0156e-01, PNorm = 80.2238, GNorm = 0.7374, lr_0 = 3.0618e-04
Loss = 9.8359e-02, PNorm = 80.2297, GNorm = 0.5527, lr_0 = 3.0597e-04
Loss = 1.0015e-01, PNorm = 80.2381, GNorm = 0.5290, lr_0 = 3.0576e-04
Loss = 1.0533e-01, PNorm = 80.2423, GNorm = 0.6663, lr_0 = 3.0555e-04
Loss = 1.0885e-01, PNorm = 80.2518, GNorm = 0.7230, lr_0 = 3.0535e-04
Loss = 1.0169e-01, PNorm = 80.2597, GNorm = 1.1315, lr_0 = 3.0514e-04
Loss = 1.0094e-01, PNorm = 80.2682, GNorm = 0.5950, lr_0 = 3.0493e-04
Loss = 1.1021e-01, PNorm = 80.2748, GNorm = 1.0679, lr_0 = 3.0472e-04
Loss = 9.9814e-02, PNorm = 80.2804, GNorm = 0.6393, lr_0 = 3.0451e-04
Loss = 9.4475e-02, PNorm = 80.2853, GNorm = 0.6203, lr_0 = 3.0430e-04
Loss = 1.0631e-01, PNorm = 80.2891, GNorm = 0.5753, lr_0 = 3.0409e-04
Loss = 1.0426e-01, PNorm = 80.2977, GNorm = 0.8326, lr_0 = 3.0388e-04
Loss = 9.8290e-02, PNorm = 80.3047, GNorm = 0.6374, lr_0 = 3.0368e-04
Loss = 1.1105e-01, PNorm = 80.3121, GNorm = 0.8945, lr_0 = 3.0347e-04
Loss = 9.9866e-02, PNorm = 80.3194, GNorm = 0.7459, lr_0 = 3.0326e-04
Loss = 1.2177e-01, PNorm = 80.3313, GNorm = 0.6690, lr_0 = 3.0305e-04
Loss = 9.1976e-02, PNorm = 80.3372, GNorm = 0.5224, lr_0 = 3.0284e-04
Loss = 1.1089e-01, PNorm = 80.3422, GNorm = 0.8018, lr_0 = 3.0264e-04
Loss = 9.3460e-02, PNorm = 80.3492, GNorm = 1.1935, lr_0 = 3.0243e-04
Loss = 1.0869e-01, PNorm = 80.3559, GNorm = 0.5804, lr_0 = 3.0222e-04
Loss = 9.6813e-02, PNorm = 80.3618, GNorm = 0.7256, lr_0 = 3.0202e-04
Loss = 1.2274e-01, PNorm = 80.3669, GNorm = 1.1494, lr_0 = 3.0181e-04
Loss = 1.1776e-01, PNorm = 80.3714, GNorm = 0.5494, lr_0 = 3.0160e-04
Loss = 1.0379e-01, PNorm = 80.3775, GNorm = 0.7397, lr_0 = 3.0140e-04
Loss = 9.9838e-02, PNorm = 80.3830, GNorm = 0.7305, lr_0 = 3.0119e-04
Loss = 9.9431e-02, PNorm = 80.3901, GNorm = 0.9701, lr_0 = 3.0098e-04
Loss = 1.1202e-01, PNorm = 80.3978, GNorm = 0.6031, lr_0 = 3.0078e-04
Loss = 9.9437e-02, PNorm = 80.4044, GNorm = 0.6542, lr_0 = 3.0057e-04
Loss = 9.1495e-02, PNorm = 80.4127, GNorm = 0.6308, lr_0 = 3.0036e-04
Loss = 8.8895e-02, PNorm = 80.4170, GNorm = 0.6811, lr_0 = 3.0016e-04
Loss = 1.0207e-01, PNorm = 80.4228, GNorm = 0.5703, lr_0 = 2.9995e-04
Loss = 9.7779e-02, PNorm = 80.4283, GNorm = 0.7466, lr_0 = 2.9975e-04
Loss = 9.6967e-02, PNorm = 80.4328, GNorm = 0.5460, lr_0 = 2.9954e-04
Loss = 1.0620e-01, PNorm = 80.4382, GNorm = 0.8024, lr_0 = 2.9934e-04
Loss = 1.1779e-01, PNorm = 80.4445, GNorm = 0.6939, lr_0 = 2.9913e-04
Loss = 1.0456e-01, PNorm = 80.4488, GNorm = 0.7773, lr_0 = 2.9893e-04
Loss = 1.0774e-01, PNorm = 80.4569, GNorm = 0.9508, lr_0 = 2.9872e-04
Loss = 1.0564e-01, PNorm = 80.4621, GNorm = 0.7130, lr_0 = 2.9852e-04
Loss = 1.0818e-01, PNorm = 80.4646, GNorm = 0.6909, lr_0 = 2.9831e-04
Loss = 9.6768e-02, PNorm = 80.4683, GNorm = 0.7220, lr_0 = 2.9811e-04
Loss = 1.0202e-01, PNorm = 80.4779, GNorm = 0.5284, lr_0 = 2.9790e-04
Loss = 1.1739e-01, PNorm = 80.4879, GNorm = 0.4903, lr_0 = 2.9770e-04
Loss = 9.0265e-02, PNorm = 80.4931, GNorm = 0.5883, lr_0 = 2.9750e-04
Loss = 1.0146e-01, PNorm = 80.4968, GNorm = 0.5675, lr_0 = 2.9729e-04
Loss = 1.0770e-01, PNorm = 80.4994, GNorm = 0.6271, lr_0 = 2.9709e-04
Loss = 1.0249e-01, PNorm = 80.5058, GNorm = 0.9344, lr_0 = 2.9689e-04
Loss = 1.0736e-01, PNorm = 80.5140, GNorm = 0.6870, lr_0 = 2.9668e-04
Loss = 1.2164e-01, PNorm = 80.5220, GNorm = 0.8817, lr_0 = 2.9648e-04
Loss = 1.1772e-01, PNorm = 80.5270, GNorm = 0.9369, lr_0 = 2.9628e-04
Loss = 1.0886e-01, PNorm = 80.5305, GNorm = 0.6087, lr_0 = 2.9607e-04
Loss = 1.0869e-01, PNorm = 80.5353, GNorm = 0.8947, lr_0 = 2.9587e-04
Loss = 1.0340e-01, PNorm = 80.5407, GNorm = 0.6602, lr_0 = 2.9567e-04
Loss = 1.0539e-01, PNorm = 80.5457, GNorm = 0.6227, lr_0 = 2.9546e-04
Loss = 9.2831e-02, PNorm = 80.5525, GNorm = 0.5829, lr_0 = 2.9526e-04
Loss = 1.0115e-01, PNorm = 80.5562, GNorm = 0.6037, lr_0 = 2.9506e-04
Loss = 9.5259e-02, PNorm = 80.5615, GNorm = 0.6629, lr_0 = 2.9486e-04
Loss = 1.0393e-01, PNorm = 80.5688, GNorm = 0.6368, lr_0 = 2.9466e-04
Loss = 1.0571e-01, PNorm = 80.5742, GNorm = 0.6589, lr_0 = 2.9445e-04
Loss = 1.2026e-01, PNorm = 80.5765, GNorm = 0.6785, lr_0 = 2.9425e-04
Loss = 1.0650e-01, PNorm = 80.5839, GNorm = 0.7332, lr_0 = 2.9405e-04
Loss = 9.8125e-02, PNorm = 80.5889, GNorm = 0.9058, lr_0 = 2.9385e-04
Loss = 1.0464e-01, PNorm = 80.5914, GNorm = 0.7506, lr_0 = 2.9365e-04
Loss = 1.0020e-01, PNorm = 80.6001, GNorm = 0.7900, lr_0 = 2.9345e-04
Loss = 1.0736e-01, PNorm = 80.6051, GNorm = 0.5729, lr_0 = 2.9325e-04
Loss = 1.1962e-01, PNorm = 80.6105, GNorm = 0.6971, lr_0 = 2.9305e-04
Loss = 1.1842e-01, PNorm = 80.6151, GNorm = 0.7431, lr_0 = 2.9284e-04
Loss = 1.0524e-01, PNorm = 80.6198, GNorm = 0.6567, lr_0 = 2.9264e-04
Loss = 1.1019e-01, PNorm = 80.6261, GNorm = 0.4887, lr_0 = 2.9244e-04
Loss = 1.0979e-01, PNorm = 80.6358, GNorm = 0.5132, lr_0 = 2.9224e-04
Loss = 1.0323e-01, PNorm = 80.6442, GNorm = 0.7569, lr_0 = 2.9204e-04
Loss = 1.0350e-01, PNorm = 80.6520, GNorm = 0.5948, lr_0 = 2.9184e-04
Loss = 1.0502e-01, PNorm = 80.6543, GNorm = 0.7834, lr_0 = 2.9164e-04
Loss = 9.9249e-02, PNorm = 80.6605, GNorm = 0.7164, lr_0 = 2.9144e-04
Loss = 1.1971e-01, PNorm = 80.6655, GNorm = 0.9121, lr_0 = 2.9124e-04
Validation mae = 0.228759
Epoch 17
Loss = 8.9679e-02, PNorm = 80.6713, GNorm = 0.6589, lr_0 = 2.9104e-04
Loss = 1.0141e-01, PNorm = 80.6737, GNorm = 0.9354, lr_0 = 2.9084e-04
Loss = 1.1120e-01, PNorm = 80.6774, GNorm = 0.6366, lr_0 = 2.9065e-04
Loss = 9.2273e-02, PNorm = 80.6839, GNorm = 0.5557, lr_0 = 2.9045e-04
Loss = 9.8849e-02, PNorm = 80.6905, GNorm = 0.6185, lr_0 = 2.9025e-04
Loss = 1.0654e-01, PNorm = 80.6980, GNorm = 1.0059, lr_0 = 2.9005e-04
Loss = 1.0404e-01, PNorm = 80.7078, GNorm = 0.5929, lr_0 = 2.8985e-04
Loss = 9.5647e-02, PNorm = 80.7162, GNorm = 0.6174, lr_0 = 2.8965e-04
Loss = 1.0537e-01, PNorm = 80.7227, GNorm = 0.6339, lr_0 = 2.8945e-04
Loss = 9.5179e-02, PNorm = 80.7266, GNorm = 0.5377, lr_0 = 2.8925e-04
Loss = 1.0357e-01, PNorm = 80.7306, GNorm = 0.6408, lr_0 = 2.8906e-04
Loss = 9.2169e-02, PNorm = 80.7383, GNorm = 0.5685, lr_0 = 2.8886e-04
Loss = 1.0816e-01, PNorm = 80.7431, GNorm = 1.0336, lr_0 = 2.8866e-04
Loss = 9.6043e-02, PNorm = 80.7458, GNorm = 0.6390, lr_0 = 2.8846e-04
Loss = 1.0675e-01, PNorm = 80.7563, GNorm = 0.5753, lr_0 = 2.8826e-04
Loss = 9.8749e-02, PNorm = 80.7653, GNorm = 0.7016, lr_0 = 2.8807e-04
Loss = 1.0181e-01, PNorm = 80.7716, GNorm = 0.9352, lr_0 = 2.8787e-04
Loss = 8.9496e-02, PNorm = 80.7820, GNorm = 0.7516, lr_0 = 2.8767e-04
Loss = 9.7654e-02, PNorm = 80.7925, GNorm = 0.5701, lr_0 = 2.8748e-04
Loss = 1.0384e-01, PNorm = 80.7976, GNorm = 0.9008, lr_0 = 2.8728e-04
Loss = 1.0560e-01, PNorm = 80.8024, GNorm = 0.6572, lr_0 = 2.8708e-04
Loss = 9.4409e-02, PNorm = 80.8094, GNorm = 0.6454, lr_0 = 2.8689e-04
Loss = 9.9752e-02, PNorm = 80.8158, GNorm = 0.7162, lr_0 = 2.8669e-04
Loss = 9.7351e-02, PNorm = 80.8202, GNorm = 1.0478, lr_0 = 2.8649e-04
Loss = 1.0138e-01, PNorm = 80.8279, GNorm = 0.5606, lr_0 = 2.8630e-04
Loss = 9.3270e-02, PNorm = 80.8353, GNorm = 0.5654, lr_0 = 2.8610e-04
Loss = 9.1310e-02, PNorm = 80.8396, GNorm = 0.5834, lr_0 = 2.8590e-04
Loss = 1.0394e-01, PNorm = 80.8437, GNorm = 0.7432, lr_0 = 2.8571e-04
Loss = 8.8636e-02, PNorm = 80.8495, GNorm = 0.7905, lr_0 = 2.8551e-04
Loss = 9.8888e-02, PNorm = 80.8536, GNorm = 0.6642, lr_0 = 2.8532e-04
Loss = 9.8239e-02, PNorm = 80.8573, GNorm = 0.7354, lr_0 = 2.8512e-04
Loss = 9.7466e-02, PNorm = 80.8627, GNorm = 0.9108, lr_0 = 2.8493e-04
Loss = 9.3317e-02, PNorm = 80.8688, GNorm = 0.7589, lr_0 = 2.8473e-04
Loss = 1.0297e-01, PNorm = 80.8776, GNorm = 0.5737, lr_0 = 2.8454e-04
Loss = 1.0262e-01, PNorm = 80.8802, GNorm = 0.5435, lr_0 = 2.8434e-04
Loss = 1.0257e-01, PNorm = 80.8883, GNorm = 0.6533, lr_0 = 2.8415e-04
Loss = 8.6430e-02, PNorm = 80.8937, GNorm = 0.4979, lr_0 = 2.8395e-04
Loss = 1.0220e-01, PNorm = 80.8990, GNorm = 0.6004, lr_0 = 2.8376e-04
Loss = 9.6760e-02, PNorm = 80.9063, GNorm = 0.4825, lr_0 = 2.8356e-04
Loss = 1.0134e-01, PNorm = 80.9129, GNorm = 0.6458, lr_0 = 2.8337e-04
Loss = 9.6762e-02, PNorm = 80.9152, GNorm = 0.9324, lr_0 = 2.8317e-04
Loss = 1.0629e-01, PNorm = 80.9176, GNorm = 1.2459, lr_0 = 2.8298e-04
Loss = 9.4777e-02, PNorm = 80.9237, GNorm = 0.5114, lr_0 = 2.8279e-04
Loss = 1.0566e-01, PNorm = 80.9303, GNorm = 0.6620, lr_0 = 2.8259e-04
Loss = 9.5136e-02, PNorm = 80.9378, GNorm = 0.6195, lr_0 = 2.8240e-04
Loss = 1.1111e-01, PNorm = 80.9467, GNorm = 0.9180, lr_0 = 2.8221e-04
Loss = 9.6738e-02, PNorm = 80.9521, GNorm = 0.7276, lr_0 = 2.8201e-04
Loss = 9.7114e-02, PNorm = 80.9566, GNorm = 0.7433, lr_0 = 2.8182e-04
Loss = 9.4943e-02, PNorm = 80.9620, GNorm = 0.8183, lr_0 = 2.8163e-04
Loss = 1.0814e-01, PNorm = 80.9651, GNorm = 0.6944, lr_0 = 2.8143e-04
Loss = 9.0108e-02, PNorm = 80.9740, GNorm = 0.6175, lr_0 = 2.8124e-04
Loss = 9.7694e-02, PNorm = 80.9797, GNorm = 0.6894, lr_0 = 2.8105e-04
Loss = 9.8519e-02, PNorm = 80.9866, GNorm = 0.6727, lr_0 = 2.8085e-04
Loss = 1.0866e-01, PNorm = 80.9931, GNorm = 0.4345, lr_0 = 2.8066e-04
Loss = 1.1124e-01, PNorm = 80.9983, GNorm = 0.8243, lr_0 = 2.8047e-04
Loss = 9.3727e-02, PNorm = 81.0015, GNorm = 0.8112, lr_0 = 2.8028e-04
Loss = 1.0480e-01, PNorm = 81.0064, GNorm = 0.6304, lr_0 = 2.8009e-04
Loss = 8.9964e-02, PNorm = 81.0117, GNorm = 0.6004, lr_0 = 2.7989e-04
Loss = 1.0806e-01, PNorm = 81.0184, GNorm = 0.5393, lr_0 = 2.7970e-04
Loss = 8.4641e-02, PNorm = 81.0233, GNorm = 0.6094, lr_0 = 2.7951e-04
Loss = 1.0236e-01, PNorm = 81.0284, GNorm = 0.7940, lr_0 = 2.7932e-04
Loss = 9.3848e-02, PNorm = 81.0326, GNorm = 0.6615, lr_0 = 2.7913e-04
Loss = 9.9535e-02, PNorm = 81.0348, GNorm = 0.5643, lr_0 = 2.7894e-04
Loss = 9.6807e-02, PNorm = 81.0388, GNorm = 0.5346, lr_0 = 2.7875e-04
Loss = 1.0868e-01, PNorm = 81.0439, GNorm = 0.6523, lr_0 = 2.7855e-04
Loss = 1.0447e-01, PNorm = 81.0500, GNorm = 0.6261, lr_0 = 2.7836e-04
Loss = 9.7781e-02, PNorm = 81.0522, GNorm = 0.6091, lr_0 = 2.7817e-04
Loss = 1.0571e-01, PNorm = 81.0571, GNorm = 0.5212, lr_0 = 2.7798e-04
Loss = 9.8831e-02, PNorm = 81.0623, GNorm = 0.4775, lr_0 = 2.7779e-04
Loss = 9.5291e-02, PNorm = 81.0645, GNorm = 0.7828, lr_0 = 2.7760e-04
Loss = 1.0375e-01, PNorm = 81.0695, GNorm = 0.5073, lr_0 = 2.7741e-04
Loss = 9.7988e-02, PNorm = 81.0758, GNorm = 0.6070, lr_0 = 2.7722e-04
Loss = 1.1707e-01, PNorm = 81.0829, GNorm = 0.6034, lr_0 = 2.7703e-04
Loss = 1.1001e-01, PNorm = 81.0925, GNorm = 0.6685, lr_0 = 2.7684e-04
Loss = 9.9581e-02, PNorm = 81.1023, GNorm = 0.9987, lr_0 = 2.7665e-04
Loss = 1.0896e-01, PNorm = 81.1108, GNorm = 0.6708, lr_0 = 2.7646e-04
Loss = 9.7320e-02, PNorm = 81.1206, GNorm = 0.5320, lr_0 = 2.7627e-04
Loss = 1.1790e-01, PNorm = 81.1278, GNorm = 0.6739, lr_0 = 2.7608e-04
Loss = 1.0367e-01, PNorm = 81.1344, GNorm = 0.7474, lr_0 = 2.7590e-04
Loss = 9.3291e-02, PNorm = 81.1413, GNorm = 0.6816, lr_0 = 2.7571e-04
Loss = 9.6809e-02, PNorm = 81.1443, GNorm = 0.8855, lr_0 = 2.7552e-04
Loss = 8.6255e-02, PNorm = 81.1468, GNorm = 0.6912, lr_0 = 2.7533e-04
Loss = 1.0157e-01, PNorm = 81.1516, GNorm = 0.6635, lr_0 = 2.7514e-04
Loss = 1.0078e-01, PNorm = 81.1547, GNorm = 0.4775, lr_0 = 2.7495e-04
Loss = 8.8635e-02, PNorm = 81.1636, GNorm = 0.8263, lr_0 = 2.7476e-04
Loss = 1.0019e-01, PNorm = 81.1683, GNorm = 0.7523, lr_0 = 2.7457e-04
Loss = 1.0619e-01, PNorm = 81.1786, GNorm = 0.7786, lr_0 = 2.7439e-04
Loss = 9.6725e-02, PNorm = 81.1815, GNorm = 0.7265, lr_0 = 2.7420e-04
Loss = 9.2644e-02, PNorm = 81.1837, GNorm = 0.5698, lr_0 = 2.7401e-04
Loss = 9.8082e-02, PNorm = 81.1890, GNorm = 0.7135, lr_0 = 2.7382e-04
Loss = 1.2287e-01, PNorm = 81.1951, GNorm = 0.9053, lr_0 = 2.7364e-04
Loss = 9.9264e-02, PNorm = 81.2016, GNorm = 0.8921, lr_0 = 2.7345e-04
Loss = 1.0668e-01, PNorm = 81.2039, GNorm = 0.7176, lr_0 = 2.7326e-04
Loss = 1.1869e-01, PNorm = 81.2103, GNorm = 0.9301, lr_0 = 2.7307e-04
Loss = 9.1359e-02, PNorm = 81.2148, GNorm = 0.5837, lr_0 = 2.7289e-04
Loss = 1.0472e-01, PNorm = 81.2200, GNorm = 0.7698, lr_0 = 2.7270e-04
Loss = 9.7218e-02, PNorm = 81.2232, GNorm = 0.7853, lr_0 = 2.7251e-04
Loss = 1.0977e-01, PNorm = 81.2287, GNorm = 0.7296, lr_0 = 2.7233e-04
Loss = 9.7321e-02, PNorm = 81.2329, GNorm = 0.7608, lr_0 = 2.7214e-04
Loss = 9.5313e-02, PNorm = 81.2390, GNorm = 0.8951, lr_0 = 2.7195e-04
Loss = 1.1506e-01, PNorm = 81.2478, GNorm = 0.6925, lr_0 = 2.7177e-04
Loss = 9.9621e-02, PNorm = 81.2508, GNorm = 0.5892, lr_0 = 2.7158e-04
Loss = 1.2216e-01, PNorm = 81.2544, GNorm = 0.7735, lr_0 = 2.7139e-04
Loss = 9.7080e-02, PNorm = 81.2572, GNorm = 0.5664, lr_0 = 2.7121e-04
Loss = 9.5148e-02, PNorm = 81.2601, GNorm = 0.5260, lr_0 = 2.7102e-04
Loss = 1.0043e-01, PNorm = 81.2630, GNorm = 0.5207, lr_0 = 2.7084e-04
Loss = 1.0319e-01, PNorm = 81.2687, GNorm = 0.6130, lr_0 = 2.7065e-04
Loss = 1.0839e-01, PNorm = 81.2738, GNorm = 0.7784, lr_0 = 2.7047e-04
Loss = 1.1805e-01, PNorm = 81.2804, GNorm = 0.7797, lr_0 = 2.7028e-04
Loss = 1.0170e-01, PNorm = 81.2885, GNorm = 0.5696, lr_0 = 2.7010e-04
Loss = 1.0540e-01, PNorm = 81.2920, GNorm = 1.0701, lr_0 = 2.6991e-04
Loss = 1.1264e-01, PNorm = 81.2960, GNorm = 1.0388, lr_0 = 2.6973e-04
Loss = 9.3799e-02, PNorm = 81.3021, GNorm = 0.5970, lr_0 = 2.6954e-04
Loss = 8.6212e-02, PNorm = 81.3069, GNorm = 0.7064, lr_0 = 2.6936e-04
Loss = 9.5380e-02, PNorm = 81.3129, GNorm = 0.6165, lr_0 = 2.6917e-04
Loss = 1.0182e-01, PNorm = 81.3165, GNorm = 0.6984, lr_0 = 2.6899e-04
Loss = 9.6544e-02, PNorm = 81.3185, GNorm = 0.5316, lr_0 = 2.6880e-04
Loss = 9.8530e-02, PNorm = 81.3253, GNorm = 0.7143, lr_0 = 2.6862e-04
Loss = 9.5904e-02, PNorm = 81.3284, GNorm = 0.4354, lr_0 = 2.6844e-04
Loss = 1.0270e-01, PNorm = 81.3333, GNorm = 0.5148, lr_0 = 2.6825e-04
Validation mae = 0.227808
Epoch 18
Loss = 8.4720e-02, PNorm = 81.3384, GNorm = 0.5542, lr_0 = 2.6807e-04
Loss = 9.0005e-02, PNorm = 81.3459, GNorm = 0.5689, lr_0 = 2.6788e-04
Loss = 8.0978e-02, PNorm = 81.3541, GNorm = 0.6088, lr_0 = 2.6770e-04
Loss = 9.1588e-02, PNorm = 81.3618, GNorm = 0.9449, lr_0 = 2.6752e-04
Loss = 9.2404e-02, PNorm = 81.3665, GNorm = 0.6248, lr_0 = 2.6733e-04
Loss = 9.8568e-02, PNorm = 81.3730, GNorm = 0.9281, lr_0 = 2.6715e-04
Loss = 9.1225e-02, PNorm = 81.3800, GNorm = 0.7057, lr_0 = 2.6697e-04
Loss = 9.8894e-02, PNorm = 81.3860, GNorm = 0.5026, lr_0 = 2.6678e-04
Loss = 8.9558e-02, PNorm = 81.3941, GNorm = 0.4982, lr_0 = 2.6660e-04
Loss = 8.7414e-02, PNorm = 81.4025, GNorm = 0.9102, lr_0 = 2.6642e-04
Loss = 1.0101e-01, PNorm = 81.4073, GNorm = 0.6616, lr_0 = 2.6624e-04
Loss = 1.0415e-01, PNorm = 81.4125, GNorm = 0.5311, lr_0 = 2.6605e-04
Loss = 8.5635e-02, PNorm = 81.4173, GNorm = 0.7362, lr_0 = 2.6587e-04
Loss = 9.6211e-02, PNorm = 81.4207, GNorm = 0.6698, lr_0 = 2.6569e-04
Loss = 9.4365e-02, PNorm = 81.4270, GNorm = 0.6156, lr_0 = 2.6551e-04
Loss = 9.9737e-02, PNorm = 81.4314, GNorm = 0.7848, lr_0 = 2.6533e-04
Loss = 9.7707e-02, PNorm = 81.4352, GNorm = 0.8804, lr_0 = 2.6514e-04
Loss = 9.3134e-02, PNorm = 81.4412, GNorm = 0.7664, lr_0 = 2.6496e-04
Loss = 9.3495e-02, PNorm = 81.4471, GNorm = 1.0260, lr_0 = 2.6478e-04
Loss = 9.1278e-02, PNorm = 81.4509, GNorm = 0.5608, lr_0 = 2.6460e-04
Loss = 1.0212e-01, PNorm = 81.4562, GNorm = 0.9564, lr_0 = 2.6442e-04
Loss = 1.0107e-01, PNorm = 81.4645, GNorm = 0.5743, lr_0 = 2.6424e-04
Loss = 9.2840e-02, PNorm = 81.4699, GNorm = 0.8611, lr_0 = 2.6406e-04
Loss = 9.0910e-02, PNorm = 81.4759, GNorm = 0.6289, lr_0 = 2.6388e-04
Loss = 9.3126e-02, PNorm = 81.4813, GNorm = 0.7293, lr_0 = 2.6369e-04
Loss = 8.5110e-02, PNorm = 81.4836, GNorm = 0.5141, lr_0 = 2.6351e-04
Loss = 9.3066e-02, PNorm = 81.4888, GNorm = 0.7438, lr_0 = 2.6333e-04
Loss = 9.1593e-02, PNorm = 81.4959, GNorm = 0.7104, lr_0 = 2.6315e-04
Loss = 1.0229e-01, PNorm = 81.5027, GNorm = 0.6402, lr_0 = 2.6297e-04
Loss = 8.1655e-02, PNorm = 81.5091, GNorm = 0.5088, lr_0 = 2.6279e-04
Loss = 1.0224e-01, PNorm = 81.5098, GNorm = 0.5901, lr_0 = 2.6261e-04
Loss = 9.7378e-02, PNorm = 81.5126, GNorm = 0.7035, lr_0 = 2.6243e-04
Loss = 8.4721e-02, PNorm = 81.5177, GNorm = 0.6655, lr_0 = 2.6225e-04
Loss = 1.1010e-01, PNorm = 81.5204, GNorm = 0.7454, lr_0 = 2.6207e-04
Loss = 9.8065e-02, PNorm = 81.5227, GNorm = 0.6025, lr_0 = 2.6189e-04
Loss = 9.9474e-02, PNorm = 81.5305, GNorm = 0.6416, lr_0 = 2.6171e-04
Loss = 1.0395e-01, PNorm = 81.5320, GNorm = 0.9374, lr_0 = 2.6153e-04
Loss = 9.5212e-02, PNorm = 81.5379, GNorm = 0.5810, lr_0 = 2.6136e-04
Loss = 1.0809e-01, PNorm = 81.5450, GNorm = 0.7201, lr_0 = 2.6118e-04
Loss = 1.0737e-01, PNorm = 81.5520, GNorm = 0.7028, lr_0 = 2.6100e-04
Loss = 9.4565e-02, PNorm = 81.5588, GNorm = 0.7899, lr_0 = 2.6082e-04
Loss = 9.2826e-02, PNorm = 81.5651, GNorm = 0.6102, lr_0 = 2.6064e-04
Loss = 1.0539e-01, PNorm = 81.5712, GNorm = 0.7309, lr_0 = 2.6046e-04
Loss = 8.8645e-02, PNorm = 81.5751, GNorm = 0.5522, lr_0 = 2.6028e-04
Loss = 9.0182e-02, PNorm = 81.5807, GNorm = 0.5566, lr_0 = 2.6011e-04
Loss = 9.4054e-02, PNorm = 81.5875, GNorm = 0.7380, lr_0 = 2.5993e-04
Loss = 9.4596e-02, PNorm = 81.5934, GNorm = 0.6790, lr_0 = 2.5975e-04
Loss = 8.8482e-02, PNorm = 81.5965, GNorm = 0.4857, lr_0 = 2.5957e-04
Loss = 8.6327e-02, PNorm = 81.5990, GNorm = 0.6369, lr_0 = 2.5939e-04
Loss = 1.0159e-01, PNorm = 81.6036, GNorm = 1.1497, lr_0 = 2.5922e-04
Loss = 9.4398e-02, PNorm = 81.6091, GNorm = 0.4770, lr_0 = 2.5904e-04
Loss = 9.4509e-02, PNorm = 81.6128, GNorm = 0.6349, lr_0 = 2.5886e-04
Loss = 1.0086e-01, PNorm = 81.6187, GNorm = 0.7464, lr_0 = 2.5868e-04
Loss = 9.2156e-02, PNorm = 81.6241, GNorm = 0.6654, lr_0 = 2.5851e-04
Loss = 9.4699e-02, PNorm = 81.6267, GNorm = 0.6140, lr_0 = 2.5833e-04
Loss = 1.0973e-01, PNorm = 81.6312, GNorm = 0.6579, lr_0 = 2.5815e-04
Loss = 9.4217e-02, PNorm = 81.6361, GNorm = 0.4531, lr_0 = 2.5797e-04
Loss = 9.9291e-02, PNorm = 81.6404, GNorm = 0.9843, lr_0 = 2.5780e-04
Loss = 9.4998e-02, PNorm = 81.6457, GNorm = 0.8063, lr_0 = 2.5762e-04
Loss = 1.0112e-01, PNorm = 81.6490, GNorm = 0.6921, lr_0 = 2.5745e-04
Loss = 9.5076e-02, PNorm = 81.6528, GNorm = 0.6957, lr_0 = 2.5727e-04
Loss = 9.5539e-02, PNorm = 81.6591, GNorm = 0.5814, lr_0 = 2.5709e-04
Loss = 9.9421e-02, PNorm = 81.6635, GNorm = 0.5414, lr_0 = 2.5692e-04
Loss = 1.0869e-01, PNorm = 81.6733, GNorm = 0.7899, lr_0 = 2.5674e-04
Loss = 1.0038e-01, PNorm = 81.6817, GNorm = 0.9225, lr_0 = 2.5656e-04
Loss = 8.8821e-02, PNorm = 81.6875, GNorm = 0.6385, lr_0 = 2.5639e-04
Loss = 8.1497e-02, PNorm = 81.6902, GNorm = 0.7714, lr_0 = 2.5621e-04
Loss = 9.8513e-02, PNorm = 81.6920, GNorm = 0.5841, lr_0 = 2.5604e-04
Loss = 1.0946e-01, PNorm = 81.6981, GNorm = 0.7157, lr_0 = 2.5586e-04
Loss = 1.1307e-01, PNorm = 81.7026, GNorm = 0.8512, lr_0 = 2.5569e-04
Loss = 7.6751e-02, PNorm = 81.7059, GNorm = 0.6179, lr_0 = 2.5551e-04
Loss = 1.0420e-01, PNorm = 81.7090, GNorm = 0.9622, lr_0 = 2.5534e-04
Loss = 9.1134e-02, PNorm = 81.7166, GNorm = 0.5610, lr_0 = 2.5516e-04
Loss = 1.0925e-01, PNorm = 81.7255, GNorm = 1.7570, lr_0 = 2.5499e-04
Loss = 1.0664e-01, PNorm = 81.7327, GNorm = 0.5351, lr_0 = 2.5481e-04
Loss = 1.0189e-01, PNorm = 81.7397, GNorm = 0.5554, lr_0 = 2.5464e-04
Loss = 1.0177e-01, PNorm = 81.7429, GNorm = 0.7977, lr_0 = 2.5446e-04
Loss = 1.0095e-01, PNorm = 81.7489, GNorm = 0.7690, lr_0 = 2.5429e-04
Loss = 1.0353e-01, PNorm = 81.7541, GNorm = 0.7042, lr_0 = 2.5411e-04
Loss = 1.0007e-01, PNorm = 81.7544, GNorm = 0.6781, lr_0 = 2.5394e-04
Loss = 1.0198e-01, PNorm = 81.7585, GNorm = 0.8182, lr_0 = 2.5377e-04
Loss = 9.7036e-02, PNorm = 81.7660, GNorm = 0.9534, lr_0 = 2.5359e-04
Loss = 1.0527e-01, PNorm = 81.7709, GNorm = 0.8512, lr_0 = 2.5342e-04
Loss = 9.2355e-02, PNorm = 81.7749, GNorm = 0.9117, lr_0 = 2.5325e-04
Loss = 1.0413e-01, PNorm = 81.7797, GNorm = 0.5537, lr_0 = 2.5307e-04
Loss = 1.0004e-01, PNorm = 81.7832, GNorm = 0.8270, lr_0 = 2.5290e-04
Loss = 9.0624e-02, PNorm = 81.7834, GNorm = 0.8842, lr_0 = 2.5273e-04
Loss = 1.0319e-01, PNorm = 81.7885, GNorm = 0.7275, lr_0 = 2.5255e-04
Loss = 9.9911e-02, PNorm = 81.7979, GNorm = 0.7565, lr_0 = 2.5238e-04
Loss = 1.0194e-01, PNorm = 81.8038, GNorm = 0.8008, lr_0 = 2.5221e-04
Loss = 9.4572e-02, PNorm = 81.8084, GNorm = 0.5883, lr_0 = 2.5203e-04
Loss = 9.3856e-02, PNorm = 81.8148, GNorm = 0.8336, lr_0 = 2.5186e-04
Loss = 1.1024e-01, PNorm = 81.8162, GNorm = 0.6750, lr_0 = 2.5169e-04
Loss = 9.5286e-02, PNorm = 81.8217, GNorm = 0.6023, lr_0 = 2.5152e-04
Loss = 9.3860e-02, PNorm = 81.8261, GNorm = 0.7819, lr_0 = 2.5134e-04
Loss = 9.6586e-02, PNorm = 81.8305, GNorm = 0.8126, lr_0 = 2.5117e-04
Loss = 1.0763e-01, PNorm = 81.8397, GNorm = 0.6030, lr_0 = 2.5100e-04
Loss = 9.3950e-02, PNorm = 81.8462, GNorm = 0.6980, lr_0 = 2.5083e-04
Loss = 8.9599e-02, PNorm = 81.8489, GNorm = 0.4854, lr_0 = 2.5066e-04
Loss = 9.2268e-02, PNorm = 81.8523, GNorm = 0.4670, lr_0 = 2.5048e-04
Loss = 1.0377e-01, PNorm = 81.8560, GNorm = 0.7879, lr_0 = 2.5031e-04
Loss = 1.0235e-01, PNorm = 81.8621, GNorm = 0.7371, lr_0 = 2.5014e-04
Loss = 9.2886e-02, PNorm = 81.8687, GNorm = 0.8226, lr_0 = 2.4997e-04
Loss = 9.3528e-02, PNorm = 81.8712, GNorm = 0.5266, lr_0 = 2.4980e-04
Loss = 9.3620e-02, PNorm = 81.8756, GNorm = 0.6620, lr_0 = 2.4963e-04
Loss = 9.5341e-02, PNorm = 81.8815, GNorm = 0.5266, lr_0 = 2.4946e-04
Loss = 9.1750e-02, PNorm = 81.8857, GNorm = 0.8323, lr_0 = 2.4929e-04
Loss = 9.4281e-02, PNorm = 81.8888, GNorm = 0.6664, lr_0 = 2.4911e-04
Loss = 1.0242e-01, PNorm = 81.8955, GNorm = 0.9017, lr_0 = 2.4894e-04
Loss = 1.0510e-01, PNorm = 81.9023, GNorm = 0.5884, lr_0 = 2.4877e-04
Loss = 1.0816e-01, PNorm = 81.9086, GNorm = 0.8636, lr_0 = 2.4860e-04
Loss = 8.8205e-02, PNorm = 81.9116, GNorm = 0.5645, lr_0 = 2.4843e-04
Loss = 1.0268e-01, PNorm = 81.9145, GNorm = 0.8086, lr_0 = 2.4826e-04
Loss = 1.1328e-01, PNorm = 81.9170, GNorm = 0.8407, lr_0 = 2.4809e-04
Loss = 9.8812e-02, PNorm = 81.9158, GNorm = 0.6635, lr_0 = 2.4792e-04
Loss = 9.8926e-02, PNorm = 81.9203, GNorm = 0.5111, lr_0 = 2.4775e-04
Loss = 9.4640e-02, PNorm = 81.9253, GNorm = 0.5907, lr_0 = 2.4758e-04
Loss = 9.5260e-02, PNorm = 81.9314, GNorm = 0.5607, lr_0 = 2.4741e-04
Loss = 1.0078e-01, PNorm = 81.9364, GNorm = 0.9014, lr_0 = 2.4724e-04
Loss = 1.0143e-01, PNorm = 81.9391, GNorm = 0.6546, lr_0 = 2.4707e-04
Validation mae = 0.228928
Epoch 19
Loss = 1.0122e-01, PNorm = 81.9442, GNorm = 0.5260, lr_0 = 2.4690e-04
Loss = 1.0293e-01, PNorm = 81.9509, GNorm = 0.6852, lr_0 = 2.4674e-04
Loss = 8.7341e-02, PNorm = 81.9575, GNorm = 0.5459, lr_0 = 2.4657e-04
Loss = 7.9030e-02, PNorm = 81.9641, GNorm = 0.5581, lr_0 = 2.4640e-04
Loss = 9.5332e-02, PNorm = 81.9701, GNorm = 0.7409, lr_0 = 2.4623e-04
Loss = 8.0677e-02, PNorm = 81.9764, GNorm = 0.5355, lr_0 = 2.4606e-04
Loss = 1.0213e-01, PNorm = 81.9828, GNorm = 1.0000, lr_0 = 2.4589e-04
Loss = 8.1125e-02, PNorm = 81.9868, GNorm = 0.7063, lr_0 = 2.4572e-04
Loss = 9.2129e-02, PNorm = 81.9896, GNorm = 0.6585, lr_0 = 2.4556e-04
Loss = 9.1590e-02, PNorm = 81.9935, GNorm = 0.6490, lr_0 = 2.4539e-04
Loss = 8.5709e-02, PNorm = 81.9990, GNorm = 0.8326, lr_0 = 2.4522e-04
Loss = 8.3536e-02, PNorm = 82.0016, GNorm = 1.0316, lr_0 = 2.4505e-04
Loss = 7.8901e-02, PNorm = 82.0063, GNorm = 0.6218, lr_0 = 2.4488e-04
Loss = 8.1009e-02, PNorm = 82.0093, GNorm = 0.7551, lr_0 = 2.4472e-04
Loss = 8.1164e-02, PNorm = 82.0112, GNorm = 0.5205, lr_0 = 2.4455e-04
Loss = 9.3658e-02, PNorm = 82.0160, GNorm = 1.1006, lr_0 = 2.4438e-04
Loss = 8.6036e-02, PNorm = 82.0196, GNorm = 0.6399, lr_0 = 2.4421e-04
Loss = 8.2128e-02, PNorm = 82.0225, GNorm = 0.5707, lr_0 = 2.4405e-04
Loss = 8.5644e-02, PNorm = 82.0284, GNorm = 0.5750, lr_0 = 2.4388e-04
Loss = 9.2869e-02, PNorm = 82.0347, GNorm = 0.6166, lr_0 = 2.4371e-04
Loss = 9.1994e-02, PNorm = 82.0370, GNorm = 0.6175, lr_0 = 2.4354e-04
Loss = 8.3539e-02, PNorm = 82.0406, GNorm = 0.6953, lr_0 = 2.4338e-04
Loss = 9.4155e-02, PNorm = 82.0458, GNorm = 0.6177, lr_0 = 2.4321e-04
Loss = 9.6487e-02, PNorm = 82.0497, GNorm = 0.8654, lr_0 = 2.4304e-04
Loss = 1.0212e-01, PNorm = 82.0550, GNorm = 0.7134, lr_0 = 2.4288e-04
Loss = 9.1233e-02, PNorm = 82.0599, GNorm = 0.7174, lr_0 = 2.4271e-04
Loss = 8.5426e-02, PNorm = 82.0645, GNorm = 0.8721, lr_0 = 2.4254e-04
Loss = 1.0271e-01, PNorm = 82.0704, GNorm = 0.7628, lr_0 = 2.4238e-04
Loss = 9.5018e-02, PNorm = 82.0770, GNorm = 0.7005, lr_0 = 2.4221e-04
Loss = 1.0261e-01, PNorm = 82.0778, GNorm = 0.7082, lr_0 = 2.4205e-04
Loss = 9.3810e-02, PNorm = 82.0818, GNorm = 0.4808, lr_0 = 2.4188e-04
Loss = 1.0137e-01, PNorm = 82.0869, GNorm = 0.5473, lr_0 = 2.4171e-04
Loss = 9.3446e-02, PNorm = 82.0906, GNorm = 0.6787, lr_0 = 2.4155e-04
Loss = 9.7678e-02, PNorm = 82.0962, GNorm = 0.6834, lr_0 = 2.4138e-04
Loss = 8.9493e-02, PNorm = 82.1015, GNorm = 0.9944, lr_0 = 2.4122e-04
Loss = 9.4117e-02, PNorm = 82.1089, GNorm = 0.5811, lr_0 = 2.4105e-04
Loss = 9.0492e-02, PNorm = 82.1122, GNorm = 0.7714, lr_0 = 2.4089e-04
Loss = 9.6483e-02, PNorm = 82.1170, GNorm = 0.5862, lr_0 = 2.4072e-04
Loss = 8.0027e-02, PNorm = 82.1208, GNorm = 0.5832, lr_0 = 2.4056e-04
Loss = 8.8367e-02, PNorm = 82.1238, GNorm = 0.6502, lr_0 = 2.4039e-04
Loss = 9.1994e-02, PNorm = 82.1290, GNorm = 0.6416, lr_0 = 2.4023e-04
Loss = 1.0244e-01, PNorm = 82.1328, GNorm = 0.6702, lr_0 = 2.4006e-04
Loss = 1.0390e-01, PNorm = 82.1382, GNorm = 1.0477, lr_0 = 2.3990e-04
Loss = 9.5431e-02, PNorm = 82.1429, GNorm = 0.8145, lr_0 = 2.3974e-04
Loss = 9.3378e-02, PNorm = 82.1461, GNorm = 0.5617, lr_0 = 2.3957e-04
Loss = 8.3766e-02, PNorm = 82.1520, GNorm = 0.7358, lr_0 = 2.3941e-04
Loss = 9.6423e-02, PNorm = 82.1581, GNorm = 0.5700, lr_0 = 2.3924e-04
Loss = 1.0787e-01, PNorm = 82.1606, GNorm = 0.7852, lr_0 = 2.3908e-04
Loss = 9.4507e-02, PNorm = 82.1616, GNorm = 0.6350, lr_0 = 2.3892e-04
Loss = 8.1716e-02, PNorm = 82.1622, GNorm = 0.7106, lr_0 = 2.3875e-04
Loss = 9.6505e-02, PNorm = 82.1644, GNorm = 0.6232, lr_0 = 2.3859e-04
Loss = 1.0352e-01, PNorm = 82.1686, GNorm = 0.8563, lr_0 = 2.3842e-04
Loss = 1.1406e-01, PNorm = 82.1715, GNorm = 0.7327, lr_0 = 2.3826e-04
Loss = 9.1744e-02, PNorm = 82.1753, GNorm = 0.5618, lr_0 = 2.3810e-04
Loss = 8.8252e-02, PNorm = 82.1782, GNorm = 0.7737, lr_0 = 2.3794e-04
Loss = 1.0341e-01, PNorm = 82.1820, GNorm = 0.6086, lr_0 = 2.3777e-04
Loss = 1.0148e-01, PNorm = 82.1873, GNorm = 0.5251, lr_0 = 2.3761e-04
Loss = 8.9190e-02, PNorm = 82.1909, GNorm = 0.6225, lr_0 = 2.3745e-04
Loss = 8.8295e-02, PNorm = 82.1946, GNorm = 0.6452, lr_0 = 2.3728e-04
Loss = 1.2605e-01, PNorm = 82.1969, GNorm = 0.6314, lr_0 = 2.3712e-04
Loss = 9.2488e-02, PNorm = 82.1988, GNorm = 0.8703, lr_0 = 2.3696e-04
Loss = 1.0008e-01, PNorm = 82.2055, GNorm = 0.8252, lr_0 = 2.3680e-04
Loss = 1.0507e-01, PNorm = 82.2131, GNorm = 0.6631, lr_0 = 2.3663e-04
Loss = 9.4557e-02, PNorm = 82.2213, GNorm = 0.8085, lr_0 = 2.3647e-04
Loss = 9.0966e-02, PNorm = 82.2292, GNorm = 0.7448, lr_0 = 2.3631e-04
Loss = 9.1391e-02, PNorm = 82.2352, GNorm = 0.6393, lr_0 = 2.3615e-04
Loss = 9.8176e-02, PNorm = 82.2386, GNorm = 0.7546, lr_0 = 2.3599e-04
Loss = 9.8863e-02, PNorm = 82.2415, GNorm = 0.7618, lr_0 = 2.3582e-04
Loss = 1.0413e-01, PNorm = 82.2431, GNorm = 0.5594, lr_0 = 2.3566e-04
Loss = 8.6596e-02, PNorm = 82.2451, GNorm = 0.6142, lr_0 = 2.3550e-04
Loss = 1.0570e-01, PNorm = 82.2528, GNorm = 0.6971, lr_0 = 2.3534e-04
Loss = 9.6383e-02, PNorm = 82.2595, GNorm = 0.7293, lr_0 = 2.3518e-04
Loss = 9.5391e-02, PNorm = 82.2647, GNorm = 0.6594, lr_0 = 2.3502e-04
Loss = 9.6107e-02, PNorm = 82.2683, GNorm = 0.7932, lr_0 = 2.3486e-04
Loss = 9.2388e-02, PNorm = 82.2700, GNorm = 0.7367, lr_0 = 2.3470e-04
Loss = 8.1507e-02, PNorm = 82.2742, GNorm = 0.8554, lr_0 = 2.3454e-04
Loss = 8.7558e-02, PNorm = 82.2777, GNorm = 0.6216, lr_0 = 2.3437e-04
Loss = 8.8198e-02, PNorm = 82.2848, GNorm = 0.6905, lr_0 = 2.3421e-04
Loss = 9.9299e-02, PNorm = 82.2908, GNorm = 0.6882, lr_0 = 2.3405e-04
Loss = 8.9511e-02, PNorm = 82.2991, GNorm = 0.6411, lr_0 = 2.3389e-04
Loss = 8.4938e-02, PNorm = 82.3033, GNorm = 0.5729, lr_0 = 2.3373e-04
Loss = 8.6114e-02, PNorm = 82.3054, GNorm = 0.6924, lr_0 = 2.3357e-04
Loss = 8.6296e-02, PNorm = 82.3094, GNorm = 0.6078, lr_0 = 2.3341e-04
Loss = 1.1076e-01, PNorm = 82.3157, GNorm = 0.6904, lr_0 = 2.3325e-04
Loss = 9.0467e-02, PNorm = 82.3214, GNorm = 0.6579, lr_0 = 2.3309e-04
Loss = 9.3638e-02, PNorm = 82.3254, GNorm = 0.6793, lr_0 = 2.3293e-04
Loss = 8.2819e-02, PNorm = 82.3280, GNorm = 0.7041, lr_0 = 2.3277e-04
Loss = 9.6981e-02, PNorm = 82.3306, GNorm = 0.9051, lr_0 = 2.3261e-04
Loss = 1.2332e-01, PNorm = 82.3373, GNorm = 0.7182, lr_0 = 2.3246e-04
Loss = 9.7215e-02, PNorm = 82.3420, GNorm = 0.5801, lr_0 = 2.3230e-04
Loss = 9.8566e-02, PNorm = 82.3477, GNorm = 0.4719, lr_0 = 2.3214e-04
Loss = 9.3958e-02, PNorm = 82.3547, GNorm = 0.7339, lr_0 = 2.3198e-04
Loss = 9.6660e-02, PNorm = 82.3591, GNorm = 0.5271, lr_0 = 2.3182e-04
Loss = 9.3563e-02, PNorm = 82.3645, GNorm = 0.5346, lr_0 = 2.3166e-04
Loss = 9.5601e-02, PNorm = 82.3690, GNorm = 0.5833, lr_0 = 2.3150e-04
Loss = 1.0468e-01, PNorm = 82.3740, GNorm = 0.6692, lr_0 = 2.3134e-04
Loss = 9.6037e-02, PNorm = 82.3770, GNorm = 0.5251, lr_0 = 2.3118e-04
Loss = 1.0011e-01, PNorm = 82.3811, GNorm = 0.6413, lr_0 = 2.3103e-04
Loss = 8.6261e-02, PNorm = 82.3872, GNorm = 0.5736, lr_0 = 2.3087e-04
Loss = 9.4668e-02, PNorm = 82.3948, GNorm = 0.6331, lr_0 = 2.3071e-04
Loss = 9.1308e-02, PNorm = 82.4000, GNorm = 0.4317, lr_0 = 2.3055e-04
Loss = 8.4403e-02, PNorm = 82.4042, GNorm = 0.7030, lr_0 = 2.3039e-04
Loss = 8.0222e-02, PNorm = 82.4062, GNorm = 0.8381, lr_0 = 2.3024e-04
Loss = 1.1777e-01, PNorm = 82.4086, GNorm = 0.6047, lr_0 = 2.3008e-04
Loss = 1.0167e-01, PNorm = 82.4131, GNorm = 0.6516, lr_0 = 2.2992e-04
Loss = 9.7887e-02, PNorm = 82.4196, GNorm = 0.7024, lr_0 = 2.2976e-04
Loss = 1.1748e-01, PNorm = 82.4287, GNorm = 0.7868, lr_0 = 2.2961e-04
Loss = 1.0864e-01, PNorm = 82.4349, GNorm = 0.9529, lr_0 = 2.2945e-04
Loss = 9.3757e-02, PNorm = 82.4413, GNorm = 0.7361, lr_0 = 2.2929e-04
Loss = 9.7594e-02, PNorm = 82.4478, GNorm = 0.8305, lr_0 = 2.2913e-04
Loss = 9.6574e-02, PNorm = 82.4503, GNorm = 0.8441, lr_0 = 2.2898e-04
Loss = 9.7821e-02, PNorm = 82.4513, GNorm = 0.4966, lr_0 = 2.2882e-04
Loss = 8.6567e-02, PNorm = 82.4519, GNorm = 0.5686, lr_0 = 2.2866e-04
Loss = 1.0114e-01, PNorm = 82.4587, GNorm = 0.7035, lr_0 = 2.2851e-04
Loss = 8.4267e-02, PNorm = 82.4629, GNorm = 0.7843, lr_0 = 2.2835e-04
Loss = 8.8415e-02, PNorm = 82.4660, GNorm = 0.5271, lr_0 = 2.2819e-04
Loss = 1.0300e-01, PNorm = 82.4746, GNorm = 0.7065, lr_0 = 2.2804e-04
Loss = 8.9377e-02, PNorm = 82.4816, GNorm = 0.5946, lr_0 = 2.2788e-04
Loss = 9.3253e-02, PNorm = 82.4845, GNorm = 0.5021, lr_0 = 2.2773e-04
Loss = 1.0596e-01, PNorm = 82.4859, GNorm = 0.5932, lr_0 = 2.2757e-04
Validation mae = 0.226636
Epoch 20
Loss = 9.1777e-02, PNorm = 82.4918, GNorm = 0.5813, lr_0 = 2.2741e-04
Loss = 8.7813e-02, PNorm = 82.4998, GNorm = 0.6892, lr_0 = 2.2726e-04
Loss = 8.7623e-02, PNorm = 82.5051, GNorm = 0.9749, lr_0 = 2.2710e-04
Loss = 9.4564e-02, PNorm = 82.5105, GNorm = 1.1850, lr_0 = 2.2695e-04
Loss = 8.5839e-02, PNorm = 82.5148, GNorm = 0.9151, lr_0 = 2.2679e-04
Loss = 9.4785e-02, PNorm = 82.5163, GNorm = 0.6466, lr_0 = 2.2664e-04
Loss = 7.7343e-02, PNorm = 82.5206, GNorm = 0.6614, lr_0 = 2.2648e-04
Loss = 1.1457e-01, PNorm = 82.5268, GNorm = 0.7694, lr_0 = 2.2632e-04
Loss = 8.9394e-02, PNorm = 82.5291, GNorm = 0.7756, lr_0 = 2.2617e-04
Loss = 9.3707e-02, PNorm = 82.5311, GNorm = 0.6201, lr_0 = 2.2601e-04
Loss = 9.2641e-02, PNorm = 82.5332, GNorm = 0.9407, lr_0 = 2.2586e-04
Loss = 8.2528e-02, PNorm = 82.5387, GNorm = 0.5447, lr_0 = 2.2571e-04
Loss = 8.5943e-02, PNorm = 82.5407, GNorm = 0.8084, lr_0 = 2.2555e-04
Loss = 1.0346e-01, PNorm = 82.5453, GNorm = 0.5996, lr_0 = 2.2540e-04
Loss = 9.1867e-02, PNorm = 82.5489, GNorm = 1.0145, lr_0 = 2.2524e-04
Loss = 8.4099e-02, PNorm = 82.5528, GNorm = 0.6635, lr_0 = 2.2509e-04
Loss = 8.7964e-02, PNorm = 82.5556, GNorm = 0.5885, lr_0 = 2.2493e-04
Loss = 8.0490e-02, PNorm = 82.5585, GNorm = 0.6311, lr_0 = 2.2478e-04
Loss = 9.1376e-02, PNorm = 82.5640, GNorm = 0.6270, lr_0 = 2.2463e-04
Loss = 9.4483e-02, PNorm = 82.5662, GNorm = 0.5103, lr_0 = 2.2447e-04
Loss = 1.0110e-01, PNorm = 82.5701, GNorm = 0.5590, lr_0 = 2.2432e-04
Loss = 8.3110e-02, PNorm = 82.5709, GNorm = 0.5849, lr_0 = 2.2416e-04
Loss = 9.3284e-02, PNorm = 82.5762, GNorm = 0.5152, lr_0 = 2.2401e-04
Loss = 8.8365e-02, PNorm = 82.5823, GNorm = 0.7292, lr_0 = 2.2386e-04
Loss = 8.8239e-02, PNorm = 82.5850, GNorm = 0.6336, lr_0 = 2.2370e-04
Loss = 9.4514e-02, PNorm = 82.5902, GNorm = 0.8578, lr_0 = 2.2355e-04
Loss = 9.9921e-02, PNorm = 82.5959, GNorm = 0.7067, lr_0 = 2.2340e-04
Loss = 8.8634e-02, PNorm = 82.6018, GNorm = 0.9488, lr_0 = 2.2324e-04
Loss = 9.0714e-02, PNorm = 82.6076, GNorm = 0.9189, lr_0 = 2.2309e-04
Loss = 9.5541e-02, PNorm = 82.6121, GNorm = 0.6859, lr_0 = 2.2294e-04
Loss = 8.5072e-02, PNorm = 82.6184, GNorm = 0.7495, lr_0 = 2.2279e-04
Loss = 8.3912e-02, PNorm = 82.6244, GNorm = 0.7870, lr_0 = 2.2263e-04
Loss = 9.3884e-02, PNorm = 82.6294, GNorm = 0.6536, lr_0 = 2.2248e-04
Loss = 8.7535e-02, PNorm = 82.6349, GNorm = 0.5935, lr_0 = 2.2233e-04
Loss = 9.0426e-02, PNorm = 82.6380, GNorm = 0.6461, lr_0 = 2.2218e-04
Loss = 9.7224e-02, PNorm = 82.6408, GNorm = 0.6036, lr_0 = 2.2202e-04
Loss = 7.9200e-02, PNorm = 82.6452, GNorm = 0.4887, lr_0 = 2.2187e-04
Loss = 8.7233e-02, PNorm = 82.6479, GNorm = 0.6355, lr_0 = 2.2172e-04
Loss = 1.0050e-01, PNorm = 82.6503, GNorm = 0.7016, lr_0 = 2.2157e-04
Loss = 1.0009e-01, PNorm = 82.6535, GNorm = 0.5422, lr_0 = 2.2142e-04
Loss = 9.9026e-02, PNorm = 82.6576, GNorm = 0.6294, lr_0 = 2.2126e-04
Loss = 8.6026e-02, PNorm = 82.6610, GNorm = 0.6194, lr_0 = 2.2111e-04
Loss = 8.4981e-02, PNorm = 82.6640, GNorm = 0.5283, lr_0 = 2.2096e-04
Loss = 9.6061e-02, PNorm = 82.6660, GNorm = 0.8819, lr_0 = 2.2081e-04
Loss = 8.7249e-02, PNorm = 82.6706, GNorm = 0.4547, lr_0 = 2.2066e-04
Loss = 9.8917e-02, PNorm = 82.6747, GNorm = 1.1496, lr_0 = 2.2051e-04
Loss = 8.9805e-02, PNorm = 82.6806, GNorm = 0.7045, lr_0 = 2.2036e-04
Loss = 8.9464e-02, PNorm = 82.6859, GNorm = 0.5024, lr_0 = 2.2021e-04
Loss = 8.5506e-02, PNorm = 82.6896, GNorm = 0.7649, lr_0 = 2.2005e-04
Loss = 8.6124e-02, PNorm = 82.6916, GNorm = 0.7585, lr_0 = 2.1990e-04
Loss = 8.9148e-02, PNorm = 82.6949, GNorm = 0.7309, lr_0 = 2.1975e-04
Loss = 8.1960e-02, PNorm = 82.7006, GNorm = 0.6708, lr_0 = 2.1960e-04
Loss = 9.1623e-02, PNorm = 82.7049, GNorm = 0.7916, lr_0 = 2.1945e-04
Loss = 9.0213e-02, PNorm = 82.7075, GNorm = 0.5364, lr_0 = 2.1930e-04
Loss = 9.0558e-02, PNorm = 82.7112, GNorm = 0.6336, lr_0 = 2.1915e-04
Loss = 8.3318e-02, PNorm = 82.7162, GNorm = 0.7298, lr_0 = 2.1900e-04
Loss = 8.7301e-02, PNorm = 82.7194, GNorm = 0.6237, lr_0 = 2.1885e-04
Loss = 1.1017e-01, PNorm = 82.7245, GNorm = 0.9138, lr_0 = 2.1870e-04
Loss = 8.4794e-02, PNorm = 82.7278, GNorm = 0.4837, lr_0 = 2.1855e-04
Loss = 9.6080e-02, PNorm = 82.7313, GNorm = 0.6635, lr_0 = 2.1840e-04
Loss = 9.2448e-02, PNorm = 82.7354, GNorm = 0.6838, lr_0 = 2.1825e-04
Loss = 8.6753e-02, PNorm = 82.7378, GNorm = 0.5759, lr_0 = 2.1810e-04
Loss = 9.7733e-02, PNorm = 82.7438, GNorm = 0.5967, lr_0 = 2.1795e-04
Loss = 9.6778e-02, PNorm = 82.7488, GNorm = 0.5814, lr_0 = 2.1780e-04
Loss = 8.0895e-02, PNorm = 82.7528, GNorm = 0.7398, lr_0 = 2.1765e-04
Loss = 8.1727e-02, PNorm = 82.7571, GNorm = 0.5289, lr_0 = 2.1751e-04
Loss = 8.4799e-02, PNorm = 82.7610, GNorm = 0.5115, lr_0 = 2.1736e-04
Loss = 7.7615e-02, PNorm = 82.7653, GNorm = 0.5614, lr_0 = 2.1721e-04
Loss = 8.8036e-02, PNorm = 82.7684, GNorm = 0.5865, lr_0 = 2.1706e-04
Loss = 8.3338e-02, PNorm = 82.7701, GNorm = 0.6792, lr_0 = 2.1691e-04
Loss = 8.9030e-02, PNorm = 82.7728, GNorm = 0.6332, lr_0 = 2.1676e-04
Loss = 9.9089e-02, PNorm = 82.7748, GNorm = 0.5581, lr_0 = 2.1661e-04
Loss = 8.9484e-02, PNorm = 82.7799, GNorm = 0.4800, lr_0 = 2.1646e-04
Loss = 8.9301e-02, PNorm = 82.7866, GNorm = 0.6742, lr_0 = 2.1632e-04
Loss = 9.1324e-02, PNorm = 82.7918, GNorm = 0.6408, lr_0 = 2.1617e-04
Loss = 9.5009e-02, PNorm = 82.7989, GNorm = 0.7012, lr_0 = 2.1602e-04
Loss = 9.0416e-02, PNorm = 82.8048, GNorm = 0.7611, lr_0 = 2.1587e-04
Loss = 8.9383e-02, PNorm = 82.8058, GNorm = 0.5320, lr_0 = 2.1572e-04
Loss = 8.7683e-02, PNorm = 82.8075, GNorm = 0.6888, lr_0 = 2.1558e-04
Loss = 9.2917e-02, PNorm = 82.8075, GNorm = 0.6148, lr_0 = 2.1543e-04
Loss = 9.0861e-02, PNorm = 82.8141, GNorm = 0.7666, lr_0 = 2.1528e-04
Loss = 8.2697e-02, PNorm = 82.8192, GNorm = 0.6223, lr_0 = 2.1513e-04
Loss = 8.7656e-02, PNorm = 82.8240, GNorm = 0.7507, lr_0 = 2.1499e-04
Loss = 9.5140e-02, PNorm = 82.8273, GNorm = 0.5068, lr_0 = 2.1484e-04
Loss = 8.8309e-02, PNorm = 82.8309, GNorm = 0.6273, lr_0 = 2.1469e-04
Loss = 9.0692e-02, PNorm = 82.8336, GNorm = 0.7035, lr_0 = 2.1454e-04
Loss = 8.4793e-02, PNorm = 82.8367, GNorm = 0.6176, lr_0 = 2.1440e-04
Loss = 8.0320e-02, PNorm = 82.8382, GNorm = 0.5174, lr_0 = 2.1425e-04
Loss = 8.6918e-02, PNorm = 82.8437, GNorm = 0.6737, lr_0 = 2.1410e-04
Loss = 8.8228e-02, PNorm = 82.8466, GNorm = 0.7078, lr_0 = 2.1396e-04
Loss = 1.0231e-01, PNorm = 82.8500, GNorm = 0.7110, lr_0 = 2.1381e-04
Loss = 9.5992e-02, PNorm = 82.8552, GNorm = 0.5487, lr_0 = 2.1366e-04
Loss = 9.3177e-02, PNorm = 82.8583, GNorm = 0.7392, lr_0 = 2.1352e-04
Loss = 8.5382e-02, PNorm = 82.8606, GNorm = 0.5402, lr_0 = 2.1337e-04
Loss = 8.9997e-02, PNorm = 82.8651, GNorm = 0.6293, lr_0 = 2.1323e-04
Loss = 9.5230e-02, PNorm = 82.8706, GNorm = 0.5988, lr_0 = 2.1308e-04
Loss = 8.9580e-02, PNorm = 82.8727, GNorm = 0.6735, lr_0 = 2.1293e-04
Loss = 9.5030e-02, PNorm = 82.8741, GNorm = 0.4673, lr_0 = 2.1279e-04
Loss = 9.7568e-02, PNorm = 82.8776, GNorm = 0.8263, lr_0 = 2.1264e-04
Loss = 9.2910e-02, PNorm = 82.8826, GNorm = 0.6948, lr_0 = 2.1250e-04
Loss = 9.9482e-02, PNorm = 82.8882, GNorm = 0.7221, lr_0 = 2.1235e-04
Loss = 9.3160e-02, PNorm = 82.8935, GNorm = 0.6832, lr_0 = 2.1221e-04
Loss = 9.3806e-02, PNorm = 82.8977, GNorm = 0.5539, lr_0 = 2.1206e-04
Loss = 1.0310e-01, PNorm = 82.9011, GNorm = 0.6004, lr_0 = 2.1191e-04
Loss = 8.5614e-02, PNorm = 82.9038, GNorm = 0.6710, lr_0 = 2.1177e-04
Loss = 9.2791e-02, PNorm = 82.9075, GNorm = 0.5664, lr_0 = 2.1162e-04
Loss = 9.4673e-02, PNorm = 82.9099, GNorm = 0.7072, lr_0 = 2.1148e-04
Loss = 1.0998e-01, PNorm = 82.9148, GNorm = 0.7786, lr_0 = 2.1133e-04
Loss = 1.1422e-01, PNorm = 82.9206, GNorm = 0.6764, lr_0 = 2.1119e-04
Loss = 9.9532e-02, PNorm = 82.9256, GNorm = 0.6159, lr_0 = 2.1104e-04
Loss = 9.8614e-02, PNorm = 82.9320, GNorm = 0.6843, lr_0 = 2.1090e-04
Loss = 7.6031e-02, PNorm = 82.9363, GNorm = 0.4849, lr_0 = 2.1076e-04
Loss = 7.7626e-02, PNorm = 82.9388, GNorm = 0.6021, lr_0 = 2.1061e-04
Loss = 7.9352e-02, PNorm = 82.9399, GNorm = 0.6169, lr_0 = 2.1047e-04
Loss = 9.4931e-02, PNorm = 82.9414, GNorm = 0.5577, lr_0 = 2.1032e-04
Loss = 9.8344e-02, PNorm = 82.9440, GNorm = 0.6713, lr_0 = 2.1018e-04
Loss = 9.5922e-02, PNorm = 82.9441, GNorm = 0.7251, lr_0 = 2.1003e-04
Loss = 1.0394e-01, PNorm = 82.9479, GNorm = 0.6909, lr_0 = 2.0989e-04
Loss = 9.0849e-02, PNorm = 82.9528, GNorm = 0.5851, lr_0 = 2.0975e-04
Loss = 1.0470e-01, PNorm = 82.9568, GNorm = 0.7997, lr_0 = 2.0960e-04
Validation mae = 0.226295
Epoch 21
Loss = 8.4614e-02, PNorm = 82.9615, GNorm = 0.6746, lr_0 = 2.0946e-04
Loss = 8.9919e-02, PNorm = 82.9658, GNorm = 0.6211, lr_0 = 2.0932e-04
Loss = 8.5627e-02, PNorm = 82.9690, GNorm = 0.7406, lr_0 = 2.0917e-04
Loss = 7.8714e-02, PNorm = 82.9724, GNorm = 0.5314, lr_0 = 2.0903e-04
Loss = 8.7527e-02, PNorm = 82.9791, GNorm = 0.9136, lr_0 = 2.0889e-04
Loss = 8.4039e-02, PNorm = 82.9813, GNorm = 0.5147, lr_0 = 2.0874e-04
Loss = 8.0637e-02, PNorm = 82.9856, GNorm = 0.5568, lr_0 = 2.0860e-04
Loss = 7.6177e-02, PNorm = 82.9875, GNorm = 0.7714, lr_0 = 2.0846e-04
Loss = 8.0596e-02, PNorm = 82.9904, GNorm = 0.6120, lr_0 = 2.0831e-04
Loss = 8.7932e-02, PNorm = 82.9953, GNorm = 0.7682, lr_0 = 2.0817e-04
Loss = 7.8256e-02, PNorm = 82.9983, GNorm = 0.8210, lr_0 = 2.0803e-04
Loss = 9.6407e-02, PNorm = 83.0012, GNorm = 0.6353, lr_0 = 2.0789e-04
Loss = 8.1316e-02, PNorm = 83.0035, GNorm = 0.8326, lr_0 = 2.0774e-04
Loss = 9.0780e-02, PNorm = 83.0062, GNorm = 0.5776, lr_0 = 2.0760e-04
Loss = 7.6662e-02, PNorm = 83.0113, GNorm = 0.6590, lr_0 = 2.0746e-04
Loss = 8.0506e-02, PNorm = 83.0160, GNorm = 0.6526, lr_0 = 2.0732e-04
Loss = 8.7141e-02, PNorm = 83.0189, GNorm = 0.5422, lr_0 = 2.0718e-04
Loss = 8.0830e-02, PNorm = 83.0238, GNorm = 0.5385, lr_0 = 2.0703e-04
Loss = 7.6083e-02, PNorm = 83.0277, GNorm = 0.5472, lr_0 = 2.0689e-04
Loss = 8.3952e-02, PNorm = 83.0298, GNorm = 0.5910, lr_0 = 2.0675e-04
Loss = 7.8404e-02, PNorm = 83.0381, GNorm = 0.5525, lr_0 = 2.0661e-04
Loss = 8.9221e-02, PNorm = 83.0417, GNorm = 0.7484, lr_0 = 2.0647e-04
Loss = 8.7801e-02, PNorm = 83.0431, GNorm = 0.5748, lr_0 = 2.0633e-04
Loss = 1.0553e-01, PNorm = 83.0481, GNorm = 1.0295, lr_0 = 2.0618e-04
Loss = 8.1020e-02, PNorm = 83.0513, GNorm = 0.6561, lr_0 = 2.0604e-04
Loss = 9.5935e-02, PNorm = 83.0546, GNorm = 0.8268, lr_0 = 2.0590e-04
Loss = 1.0081e-01, PNorm = 83.0627, GNorm = 0.6387, lr_0 = 2.0576e-04
Loss = 8.6408e-02, PNorm = 83.0674, GNorm = 0.7376, lr_0 = 2.0562e-04
Loss = 9.4930e-02, PNorm = 83.0717, GNorm = 0.7959, lr_0 = 2.0548e-04
Loss = 9.0175e-02, PNorm = 83.0778, GNorm = 0.5642, lr_0 = 2.0534e-04
Loss = 9.2630e-02, PNorm = 83.0828, GNorm = 0.4231, lr_0 = 2.0520e-04
Loss = 9.3692e-02, PNorm = 83.0849, GNorm = 0.6802, lr_0 = 2.0506e-04
Loss = 1.0432e-01, PNorm = 83.0895, GNorm = 0.7331, lr_0 = 2.0492e-04
Loss = 9.1658e-02, PNorm = 83.0941, GNorm = 0.7353, lr_0 = 2.0478e-04
Loss = 8.5222e-02, PNorm = 83.0975, GNorm = 0.5649, lr_0 = 2.0464e-04
Loss = 9.2167e-02, PNorm = 83.1032, GNorm = 0.6997, lr_0 = 2.0450e-04
Loss = 8.0108e-02, PNorm = 83.1094, GNorm = 0.5110, lr_0 = 2.0436e-04
Loss = 8.4085e-02, PNorm = 83.1128, GNorm = 0.5813, lr_0 = 2.0422e-04
Loss = 9.6038e-02, PNorm = 83.1164, GNorm = 0.4901, lr_0 = 2.0408e-04
Loss = 1.1182e-01, PNorm = 83.1244, GNorm = 0.7152, lr_0 = 2.0394e-04
Loss = 8.6166e-02, PNorm = 83.1288, GNorm = 0.6125, lr_0 = 2.0380e-04
Loss = 8.2256e-02, PNorm = 83.1315, GNorm = 0.6066, lr_0 = 2.0366e-04
Loss = 9.0994e-02, PNorm = 83.1354, GNorm = 0.7383, lr_0 = 2.0352e-04
Loss = 7.8438e-02, PNorm = 83.1409, GNorm = 0.6901, lr_0 = 2.0338e-04
Loss = 9.7869e-02, PNorm = 83.1439, GNorm = 0.6517, lr_0 = 2.0324e-04
Loss = 9.1037e-02, PNorm = 83.1479, GNorm = 0.6513, lr_0 = 2.0310e-04
Loss = 1.0093e-01, PNorm = 83.1489, GNorm = 0.6186, lr_0 = 2.0296e-04
Loss = 8.5573e-02, PNorm = 83.1521, GNorm = 0.8814, lr_0 = 2.0282e-04
Loss = 8.5112e-02, PNorm = 83.1558, GNorm = 0.6607, lr_0 = 2.0268e-04
Loss = 8.4474e-02, PNorm = 83.1607, GNorm = 0.5766, lr_0 = 2.0254e-04
Loss = 9.0237e-02, PNorm = 83.1630, GNorm = 0.5710, lr_0 = 2.0240e-04
Loss = 8.8096e-02, PNorm = 83.1692, GNorm = 0.5022, lr_0 = 2.0227e-04
Loss = 9.0744e-02, PNorm = 83.1743, GNorm = 0.6843, lr_0 = 2.0213e-04
Loss = 8.0154e-02, PNorm = 83.1757, GNorm = 0.5289, lr_0 = 2.0199e-04
Loss = 8.3725e-02, PNorm = 83.1763, GNorm = 0.6124, lr_0 = 2.0185e-04
Loss = 9.5217e-02, PNorm = 83.1802, GNorm = 0.9762, lr_0 = 2.0171e-04
Loss = 1.0185e-01, PNorm = 83.1861, GNorm = 0.7312, lr_0 = 2.0157e-04
Loss = 9.0177e-02, PNorm = 83.1889, GNorm = 0.6052, lr_0 = 2.0144e-04
Loss = 9.8702e-02, PNorm = 83.1932, GNorm = 0.6483, lr_0 = 2.0130e-04
Loss = 8.7065e-02, PNorm = 83.1972, GNorm = 0.8661, lr_0 = 2.0116e-04
Loss = 1.0070e-01, PNorm = 83.1949, GNorm = 0.9597, lr_0 = 2.0102e-04
Loss = 8.7387e-02, PNorm = 83.1965, GNorm = 0.5690, lr_0 = 2.0088e-04
Loss = 8.8534e-02, PNorm = 83.2006, GNorm = 0.6981, lr_0 = 2.0075e-04
Loss = 9.3834e-02, PNorm = 83.2041, GNorm = 0.6107, lr_0 = 2.0061e-04
Loss = 8.6061e-02, PNorm = 83.2104, GNorm = 0.5934, lr_0 = 2.0047e-04
Loss = 8.7004e-02, PNorm = 83.2148, GNorm = 0.6746, lr_0 = 2.0033e-04
Loss = 9.8014e-02, PNorm = 83.2174, GNorm = 0.9671, lr_0 = 2.0020e-04
Loss = 8.9748e-02, PNorm = 83.2198, GNorm = 0.7190, lr_0 = 2.0006e-04
Loss = 8.1589e-02, PNorm = 83.2219, GNorm = 0.6165, lr_0 = 1.9992e-04
Loss = 9.4448e-02, PNorm = 83.2231, GNorm = 0.4824, lr_0 = 1.9979e-04
Loss = 8.0863e-02, PNorm = 83.2284, GNorm = 0.8839, lr_0 = 1.9965e-04
Loss = 8.9101e-02, PNorm = 83.2325, GNorm = 0.4749, lr_0 = 1.9951e-04
Loss = 1.0686e-01, PNorm = 83.2364, GNorm = 0.8838, lr_0 = 1.9938e-04
Loss = 7.8960e-02, PNorm = 83.2401, GNorm = 0.5336, lr_0 = 1.9924e-04
Loss = 8.7048e-02, PNorm = 83.2433, GNorm = 0.6104, lr_0 = 1.9910e-04
Loss = 7.6126e-02, PNorm = 83.2447, GNorm = 0.6179, lr_0 = 1.9897e-04
Loss = 9.2531e-02, PNorm = 83.2476, GNorm = 0.9333, lr_0 = 1.9883e-04
Loss = 8.7847e-02, PNorm = 83.2522, GNorm = 0.5789, lr_0 = 1.9869e-04
Loss = 9.0804e-02, PNorm = 83.2542, GNorm = 0.8411, lr_0 = 1.9856e-04
Loss = 8.2028e-02, PNorm = 83.2561, GNorm = 0.5609, lr_0 = 1.9842e-04
Loss = 9.9116e-02, PNorm = 83.2587, GNorm = 0.6186, lr_0 = 1.9829e-04
Loss = 8.4097e-02, PNorm = 83.2600, GNorm = 0.4922, lr_0 = 1.9815e-04
Loss = 8.2855e-02, PNorm = 83.2639, GNorm = 0.4154, lr_0 = 1.9801e-04
Loss = 8.0194e-02, PNorm = 83.2696, GNorm = 0.6365, lr_0 = 1.9788e-04
Loss = 8.3551e-02, PNorm = 83.2745, GNorm = 0.5816, lr_0 = 1.9774e-04
Loss = 9.1372e-02, PNorm = 83.2773, GNorm = 0.7433, lr_0 = 1.9761e-04
Loss = 8.4335e-02, PNorm = 83.2795, GNorm = 0.5419, lr_0 = 1.9747e-04
Loss = 8.8998e-02, PNorm = 83.2814, GNorm = 0.6135, lr_0 = 1.9734e-04
Loss = 8.6642e-02, PNorm = 83.2859, GNorm = 0.6964, lr_0 = 1.9720e-04
Loss = 8.6233e-02, PNorm = 83.2880, GNorm = 0.5186, lr_0 = 1.9707e-04
Loss = 8.7098e-02, PNorm = 83.2880, GNorm = 0.5907, lr_0 = 1.9693e-04
Loss = 8.2363e-02, PNorm = 83.2902, GNorm = 0.5775, lr_0 = 1.9680e-04
Loss = 8.3581e-02, PNorm = 83.2919, GNorm = 0.7785, lr_0 = 1.9666e-04
Loss = 9.0898e-02, PNorm = 83.2927, GNorm = 0.6395, lr_0 = 1.9653e-04
Loss = 9.8896e-02, PNorm = 83.2991, GNorm = 0.9984, lr_0 = 1.9639e-04
Loss = 9.8439e-02, PNorm = 83.3035, GNorm = 0.8172, lr_0 = 1.9626e-04
Loss = 8.2043e-02, PNorm = 83.3054, GNorm = 0.7695, lr_0 = 1.9612e-04
Loss = 9.5292e-02, PNorm = 83.3079, GNorm = 0.7664, lr_0 = 1.9599e-04
Loss = 1.1273e-01, PNorm = 83.3114, GNorm = 0.6736, lr_0 = 1.9585e-04
Loss = 9.3310e-02, PNorm = 83.3132, GNorm = 0.7004, lr_0 = 1.9572e-04
Loss = 8.3803e-02, PNorm = 83.3153, GNorm = 0.6147, lr_0 = 1.9559e-04
Loss = 7.7464e-02, PNorm = 83.3174, GNorm = 0.6687, lr_0 = 1.9545e-04
Loss = 8.7019e-02, PNorm = 83.3208, GNorm = 0.5347, lr_0 = 1.9532e-04
Loss = 9.0809e-02, PNorm = 83.3253, GNorm = 1.0707, lr_0 = 1.9518e-04
Loss = 7.8689e-02, PNorm = 83.3301, GNorm = 0.6669, lr_0 = 1.9505e-04
Loss = 9.4997e-02, PNorm = 83.3298, GNorm = 0.7552, lr_0 = 1.9492e-04
Loss = 9.0966e-02, PNorm = 83.3308, GNorm = 0.6489, lr_0 = 1.9478e-04
Loss = 8.9113e-02, PNorm = 83.3335, GNorm = 0.6942, lr_0 = 1.9465e-04
Loss = 1.0041e-01, PNorm = 83.3367, GNorm = 0.5254, lr_0 = 1.9452e-04
Loss = 7.5884e-02, PNorm = 83.3395, GNorm = 0.4451, lr_0 = 1.9438e-04
Loss = 9.7727e-02, PNorm = 83.3429, GNorm = 0.5855, lr_0 = 1.9425e-04
Loss = 8.9031e-02, PNorm = 83.3488, GNorm = 0.8164, lr_0 = 1.9412e-04
Loss = 9.5190e-02, PNorm = 83.3520, GNorm = 0.7344, lr_0 = 1.9398e-04
Loss = 1.0105e-01, PNorm = 83.3547, GNorm = 0.7905, lr_0 = 1.9385e-04
Loss = 9.2809e-02, PNorm = 83.3588, GNorm = 0.6081, lr_0 = 1.9372e-04
Loss = 8.8178e-02, PNorm = 83.3595, GNorm = 0.7203, lr_0 = 1.9359e-04
Loss = 9.7075e-02, PNorm = 83.3610, GNorm = 0.6415, lr_0 = 1.9345e-04
Loss = 1.0941e-01, PNorm = 83.3634, GNorm = 0.6584, lr_0 = 1.9332e-04
Loss = 9.3797e-02, PNorm = 83.3671, GNorm = 0.8530, lr_0 = 1.9319e-04
Loss = 8.8280e-02, PNorm = 83.3686, GNorm = 0.9092, lr_0 = 1.9306e-04
Validation mae = 0.227817
Epoch 22
Loss = 7.7295e-02, PNorm = 83.3712, GNorm = 0.5831, lr_0 = 1.9292e-04
Loss = 6.9863e-02, PNorm = 83.3766, GNorm = 0.5535, lr_0 = 1.9279e-04
Loss = 8.4122e-02, PNorm = 83.3820, GNorm = 0.5419, lr_0 = 1.9266e-04
Loss = 7.8032e-02, PNorm = 83.3871, GNorm = 0.4357, lr_0 = 1.9253e-04
Loss = 8.4815e-02, PNorm = 83.3905, GNorm = 0.5745, lr_0 = 1.9240e-04
Loss = 7.9474e-02, PNorm = 83.3927, GNorm = 0.6374, lr_0 = 1.9226e-04
Loss = 8.7634e-02, PNorm = 83.3961, GNorm = 0.6990, lr_0 = 1.9213e-04
Loss = 8.9897e-02, PNorm = 83.3997, GNorm = 0.8051, lr_0 = 1.9200e-04
Loss = 7.5617e-02, PNorm = 83.4055, GNorm = 0.4586, lr_0 = 1.9187e-04
Loss = 8.1405e-02, PNorm = 83.4087, GNorm = 0.6548, lr_0 = 1.9174e-04
Loss = 7.2405e-02, PNorm = 83.4113, GNorm = 0.5618, lr_0 = 1.9161e-04
Loss = 7.4559e-02, PNorm = 83.4138, GNorm = 0.5674, lr_0 = 1.9148e-04
Loss = 8.3018e-02, PNorm = 83.4170, GNorm = 0.5575, lr_0 = 1.9134e-04
Loss = 8.3864e-02, PNorm = 83.4200, GNorm = 0.8560, lr_0 = 1.9121e-04
Loss = 8.5549e-02, PNorm = 83.4225, GNorm = 0.6257, lr_0 = 1.9108e-04
Loss = 8.1982e-02, PNorm = 83.4215, GNorm = 0.6126, lr_0 = 1.9095e-04
Loss = 7.3533e-02, PNorm = 83.4247, GNorm = 0.6012, lr_0 = 1.9082e-04
Loss = 8.3866e-02, PNorm = 83.4257, GNorm = 0.6438, lr_0 = 1.9069e-04
Loss = 7.9261e-02, PNorm = 83.4293, GNorm = 0.8422, lr_0 = 1.9056e-04
Loss = 9.0871e-02, PNorm = 83.4345, GNorm = 0.6469, lr_0 = 1.9043e-04
Loss = 8.8241e-02, PNorm = 83.4396, GNorm = 0.6626, lr_0 = 1.9030e-04
Loss = 9.5263e-02, PNorm = 83.4450, GNorm = 0.8531, lr_0 = 1.9017e-04
Loss = 8.9061e-02, PNorm = 83.4488, GNorm = 0.7751, lr_0 = 1.9004e-04
Loss = 9.3549e-02, PNorm = 83.4512, GNorm = 0.8218, lr_0 = 1.8991e-04
Loss = 8.1558e-02, PNorm = 83.4509, GNorm = 0.5799, lr_0 = 1.8978e-04
Loss = 8.0005e-02, PNorm = 83.4550, GNorm = 0.7246, lr_0 = 1.8965e-04
Loss = 9.2776e-02, PNorm = 83.4569, GNorm = 0.6542, lr_0 = 1.8952e-04
Loss = 8.6697e-02, PNorm = 83.4602, GNorm = 0.7726, lr_0 = 1.8939e-04
Loss = 9.0927e-02, PNorm = 83.4634, GNorm = 0.7701, lr_0 = 1.8926e-04
Loss = 8.5062e-02, PNorm = 83.4655, GNorm = 0.4989, lr_0 = 1.8913e-04
Loss = 7.8714e-02, PNorm = 83.4710, GNorm = 0.4871, lr_0 = 1.8900e-04
Loss = 1.0256e-01, PNorm = 83.4759, GNorm = 0.8408, lr_0 = 1.8887e-04
Loss = 8.7143e-02, PNorm = 83.4787, GNorm = 0.6264, lr_0 = 1.8874e-04
Loss = 9.3576e-02, PNorm = 83.4815, GNorm = 0.6215, lr_0 = 1.8861e-04
Loss = 8.0572e-02, PNorm = 83.4850, GNorm = 0.5564, lr_0 = 1.8848e-04
Loss = 9.0196e-02, PNorm = 83.4894, GNorm = 0.7131, lr_0 = 1.8835e-04
Loss = 1.0068e-01, PNorm = 83.4929, GNorm = 0.6978, lr_0 = 1.8822e-04
Loss = 7.6732e-02, PNorm = 83.4971, GNorm = 0.6301, lr_0 = 1.8809e-04
Loss = 8.2201e-02, PNorm = 83.5018, GNorm = 0.7988, lr_0 = 1.8797e-04
Loss = 8.3919e-02, PNorm = 83.5081, GNorm = 0.6753, lr_0 = 1.8784e-04
Loss = 8.5368e-02, PNorm = 83.5107, GNorm = 0.6211, lr_0 = 1.8771e-04
Loss = 9.7574e-02, PNorm = 83.5121, GNorm = 0.6802, lr_0 = 1.8758e-04
Loss = 9.1369e-02, PNorm = 83.5158, GNorm = 1.0532, lr_0 = 1.8745e-04
Loss = 9.1495e-02, PNorm = 83.5186, GNorm = 0.8492, lr_0 = 1.8732e-04
Loss = 7.7659e-02, PNorm = 83.5194, GNorm = 0.8782, lr_0 = 1.8719e-04
Loss = 9.2943e-02, PNorm = 83.5247, GNorm = 0.7092, lr_0 = 1.8707e-04
Loss = 8.0432e-02, PNorm = 83.5299, GNorm = 0.6725, lr_0 = 1.8694e-04
Loss = 9.9721e-02, PNorm = 83.5317, GNorm = 0.9170, lr_0 = 1.8681e-04
Loss = 7.8917e-02, PNorm = 83.5320, GNorm = 0.5287, lr_0 = 1.8668e-04
Loss = 9.7220e-02, PNorm = 83.5350, GNorm = 0.7366, lr_0 = 1.8655e-04
Loss = 8.6213e-02, PNorm = 83.5396, GNorm = 0.5249, lr_0 = 1.8643e-04
Loss = 9.2633e-02, PNorm = 83.5439, GNorm = 0.4993, lr_0 = 1.8630e-04
Loss = 8.2780e-02, PNorm = 83.5465, GNorm = 0.7320, lr_0 = 1.8617e-04
Loss = 9.3996e-02, PNorm = 83.5496, GNorm = 0.9505, lr_0 = 1.8604e-04
Loss = 8.0313e-02, PNorm = 83.5531, GNorm = 0.7619, lr_0 = 1.8592e-04
Loss = 1.0068e-01, PNorm = 83.5560, GNorm = 0.6813, lr_0 = 1.8579e-04
Loss = 8.7188e-02, PNorm = 83.5562, GNorm = 0.7063, lr_0 = 1.8566e-04
Loss = 9.4837e-02, PNorm = 83.5620, GNorm = 0.6459, lr_0 = 1.8553e-04
Loss = 8.7767e-02, PNorm = 83.5657, GNorm = 0.9814, lr_0 = 1.8541e-04
Loss = 7.0887e-02, PNorm = 83.5677, GNorm = 0.7575, lr_0 = 1.8528e-04
Loss = 8.5819e-02, PNorm = 83.5683, GNorm = 0.6281, lr_0 = 1.8515e-04
Loss = 9.1245e-02, PNorm = 83.5709, GNorm = 0.5677, lr_0 = 1.8503e-04
Loss = 9.2343e-02, PNorm = 83.5730, GNorm = 0.5904, lr_0 = 1.8490e-04
Loss = 9.0626e-02, PNorm = 83.5752, GNorm = 0.5116, lr_0 = 1.8477e-04
Loss = 9.8516e-02, PNorm = 83.5786, GNorm = 0.7723, lr_0 = 1.8465e-04
Loss = 1.0310e-01, PNorm = 83.5804, GNorm = 0.7197, lr_0 = 1.8452e-04
Loss = 7.9347e-02, PNorm = 83.5831, GNorm = 0.6105, lr_0 = 1.8439e-04
Loss = 8.3581e-02, PNorm = 83.5867, GNorm = 0.6315, lr_0 = 1.8427e-04
Loss = 8.7851e-02, PNorm = 83.5888, GNorm = 0.7557, lr_0 = 1.8414e-04
Loss = 8.9026e-02, PNorm = 83.5911, GNorm = 0.6386, lr_0 = 1.8401e-04
Loss = 8.6497e-02, PNorm = 83.5936, GNorm = 0.5391, lr_0 = 1.8389e-04
Loss = 8.2671e-02, PNorm = 83.5958, GNorm = 0.5809, lr_0 = 1.8376e-04
Loss = 8.6897e-02, PNorm = 83.5993, GNorm = 0.5104, lr_0 = 1.8364e-04
Loss = 8.6906e-02, PNorm = 83.6022, GNorm = 0.7178, lr_0 = 1.8351e-04
Loss = 9.6506e-02, PNorm = 83.6041, GNorm = 0.5554, lr_0 = 1.8338e-04
Loss = 8.7459e-02, PNorm = 83.6063, GNorm = 0.6103, lr_0 = 1.8326e-04
Loss = 9.0574e-02, PNorm = 83.6075, GNorm = 0.6184, lr_0 = 1.8313e-04
Loss = 8.8255e-02, PNorm = 83.6104, GNorm = 0.7353, lr_0 = 1.8301e-04
Loss = 8.3967e-02, PNorm = 83.6143, GNorm = 0.6566, lr_0 = 1.8288e-04
Loss = 8.6679e-02, PNorm = 83.6159, GNorm = 0.6578, lr_0 = 1.8276e-04
Loss = 8.4007e-02, PNorm = 83.6200, GNorm = 0.5634, lr_0 = 1.8263e-04
Loss = 8.0973e-02, PNorm = 83.6243, GNorm = 0.6081, lr_0 = 1.8251e-04
Loss = 9.4676e-02, PNorm = 83.6288, GNorm = 0.8640, lr_0 = 1.8238e-04
Loss = 7.8841e-02, PNorm = 83.6324, GNorm = 0.5314, lr_0 = 1.8226e-04
Loss = 8.6610e-02, PNorm = 83.6358, GNorm = 0.8504, lr_0 = 1.8213e-04
Loss = 8.1000e-02, PNorm = 83.6422, GNorm = 0.7214, lr_0 = 1.8201e-04
Loss = 8.4518e-02, PNorm = 83.6456, GNorm = 0.7968, lr_0 = 1.8188e-04
Loss = 8.8526e-02, PNorm = 83.6460, GNorm = 0.7436, lr_0 = 1.8176e-04
Loss = 9.0923e-02, PNorm = 83.6484, GNorm = 0.7442, lr_0 = 1.8163e-04
Loss = 8.4067e-02, PNorm = 83.6558, GNorm = 0.5892, lr_0 = 1.8151e-04
Loss = 8.7059e-02, PNorm = 83.6602, GNorm = 0.5613, lr_0 = 1.8138e-04
Loss = 8.0225e-02, PNorm = 83.6630, GNorm = 0.5897, lr_0 = 1.8126e-04
Loss = 8.2292e-02, PNorm = 83.6677, GNorm = 0.7787, lr_0 = 1.8114e-04
Loss = 9.2709e-02, PNorm = 83.6696, GNorm = 0.8747, lr_0 = 1.8101e-04
Loss = 7.9774e-02, PNorm = 83.6712, GNorm = 0.4260, lr_0 = 1.8089e-04
Loss = 9.5376e-02, PNorm = 83.6745, GNorm = 0.5778, lr_0 = 1.8076e-04
Loss = 9.6856e-02, PNorm = 83.6754, GNorm = 0.5657, lr_0 = 1.8064e-04
Loss = 8.5023e-02, PNorm = 83.6760, GNorm = 0.6384, lr_0 = 1.8052e-04
Loss = 8.5075e-02, PNorm = 83.6787, GNorm = 0.6259, lr_0 = 1.8039e-04
Loss = 9.1931e-02, PNorm = 83.6814, GNorm = 0.9045, lr_0 = 1.8027e-04
Loss = 8.8819e-02, PNorm = 83.6828, GNorm = 0.4614, lr_0 = 1.8015e-04
Loss = 9.9798e-02, PNorm = 83.6862, GNorm = 0.6348, lr_0 = 1.8002e-04
Loss = 8.2296e-02, PNorm = 83.6883, GNorm = 0.5541, lr_0 = 1.7990e-04
Loss = 9.8725e-02, PNorm = 83.6928, GNorm = 0.7977, lr_0 = 1.7978e-04
Loss = 9.1813e-02, PNorm = 83.6954, GNorm = 0.5035, lr_0 = 1.7965e-04
Loss = 7.7644e-02, PNorm = 83.7017, GNorm = 0.5926, lr_0 = 1.7953e-04
Loss = 8.9640e-02, PNorm = 83.7050, GNorm = 0.4814, lr_0 = 1.7941e-04
Loss = 8.8240e-02, PNorm = 83.7050, GNorm = 0.6685, lr_0 = 1.7928e-04
Loss = 7.9471e-02, PNorm = 83.7078, GNorm = 0.4989, lr_0 = 1.7916e-04
Loss = 8.8121e-02, PNorm = 83.7110, GNorm = 0.8784, lr_0 = 1.7904e-04
Loss = 8.6010e-02, PNorm = 83.7136, GNorm = 0.6848, lr_0 = 1.7892e-04
Loss = 8.1500e-02, PNorm = 83.7169, GNorm = 0.6499, lr_0 = 1.7879e-04
Loss = 8.8473e-02, PNorm = 83.7198, GNorm = 0.5962, lr_0 = 1.7867e-04
Loss = 7.4351e-02, PNorm = 83.7223, GNorm = 0.6152, lr_0 = 1.7855e-04
Loss = 9.3463e-02, PNorm = 83.7254, GNorm = 0.6079, lr_0 = 1.7843e-04
Loss = 7.3711e-02, PNorm = 83.7277, GNorm = 0.5246, lr_0 = 1.7830e-04
Loss = 7.8750e-02, PNorm = 83.7277, GNorm = 0.5066, lr_0 = 1.7818e-04
Loss = 8.4999e-02, PNorm = 83.7306, GNorm = 0.7000, lr_0 = 1.7806e-04
Loss = 8.0031e-02, PNorm = 83.7340, GNorm = 0.6347, lr_0 = 1.7794e-04
Loss = 8.1507e-02, PNorm = 83.7364, GNorm = 0.5268, lr_0 = 1.7782e-04
Validation mae = 0.226168
Epoch 23
Loss = 9.1648e-02, PNorm = 83.7396, GNorm = 0.5149, lr_0 = 1.7769e-04
Loss = 8.7908e-02, PNorm = 83.7445, GNorm = 0.7691, lr_0 = 1.7757e-04
Loss = 8.7556e-02, PNorm = 83.7495, GNorm = 0.4455, lr_0 = 1.7745e-04
Loss = 8.2645e-02, PNorm = 83.7533, GNorm = 0.5976, lr_0 = 1.7733e-04
Loss = 8.0150e-02, PNorm = 83.7555, GNorm = 0.6938, lr_0 = 1.7721e-04
Loss = 8.5300e-02, PNorm = 83.7579, GNorm = 0.6745, lr_0 = 1.7709e-04
Loss = 7.2067e-02, PNorm = 83.7611, GNorm = 0.7193, lr_0 = 1.7696e-04
Loss = 9.0062e-02, PNorm = 83.7642, GNorm = 0.7146, lr_0 = 1.7684e-04
Loss = 8.0929e-02, PNorm = 83.7691, GNorm = 0.5227, lr_0 = 1.7672e-04
Loss = 8.6362e-02, PNorm = 83.7748, GNorm = 0.6245, lr_0 = 1.7660e-04
Loss = 8.5128e-02, PNorm = 83.7784, GNorm = 0.6568, lr_0 = 1.7648e-04
Loss = 7.8062e-02, PNorm = 83.7803, GNorm = 0.7473, lr_0 = 1.7636e-04
Loss = 6.9815e-02, PNorm = 83.7831, GNorm = 0.5580, lr_0 = 1.7624e-04
Loss = 7.2525e-02, PNorm = 83.7867, GNorm = 0.5788, lr_0 = 1.7612e-04
Loss = 7.9645e-02, PNorm = 83.7870, GNorm = 0.7169, lr_0 = 1.7600e-04
Loss = 7.9201e-02, PNorm = 83.7888, GNorm = 0.5238, lr_0 = 1.7588e-04
Loss = 8.9005e-02, PNorm = 83.7931, GNorm = 0.5151, lr_0 = 1.7576e-04
Loss = 7.6578e-02, PNorm = 83.7948, GNorm = 0.7271, lr_0 = 1.7564e-04
Loss = 9.2887e-02, PNorm = 83.7977, GNorm = 0.5265, lr_0 = 1.7552e-04
Loss = 8.3427e-02, PNorm = 83.8020, GNorm = 0.7326, lr_0 = 1.7540e-04
Loss = 8.3446e-02, PNorm = 83.8061, GNorm = 0.8032, lr_0 = 1.7528e-04
Loss = 6.7346e-02, PNorm = 83.8089, GNorm = 0.5811, lr_0 = 1.7516e-04
Loss = 6.9016e-02, PNorm = 83.8156, GNorm = 0.5829, lr_0 = 1.7504e-04
Loss = 8.7549e-02, PNorm = 83.8188, GNorm = 0.6512, lr_0 = 1.7492e-04
Loss = 8.1810e-02, PNorm = 83.8218, GNorm = 0.8175, lr_0 = 1.7480e-04
Loss = 7.8124e-02, PNorm = 83.8257, GNorm = 0.5701, lr_0 = 1.7468e-04
Loss = 8.7124e-02, PNorm = 83.8288, GNorm = 0.4390, lr_0 = 1.7456e-04
Loss = 9.1253e-02, PNorm = 83.8308, GNorm = 0.6891, lr_0 = 1.7444e-04
Loss = 8.0679e-02, PNorm = 83.8333, GNorm = 0.7362, lr_0 = 1.7432e-04
Loss = 7.7819e-02, PNorm = 83.8356, GNorm = 0.6338, lr_0 = 1.7420e-04
Loss = 8.0139e-02, PNorm = 83.8380, GNorm = 1.1437, lr_0 = 1.7408e-04
Loss = 7.9715e-02, PNorm = 83.8413, GNorm = 0.7412, lr_0 = 1.7396e-04
Loss = 7.5477e-02, PNorm = 83.8440, GNorm = 0.5725, lr_0 = 1.7384e-04
Loss = 9.2435e-02, PNorm = 83.8463, GNorm = 0.6001, lr_0 = 1.7372e-04
Loss = 8.3089e-02, PNorm = 83.8494, GNorm = 0.6795, lr_0 = 1.7360e-04
Loss = 8.7901e-02, PNorm = 83.8513, GNorm = 0.5676, lr_0 = 1.7348e-04
Loss = 9.2977e-02, PNorm = 83.8555, GNorm = 0.5528, lr_0 = 1.7336e-04
Loss = 8.8691e-02, PNorm = 83.8575, GNorm = 0.7963, lr_0 = 1.7325e-04
Loss = 8.7607e-02, PNorm = 83.8601, GNorm = 0.8751, lr_0 = 1.7313e-04
Loss = 6.9532e-02, PNorm = 83.8635, GNorm = 0.6761, lr_0 = 1.7301e-04
Loss = 8.7729e-02, PNorm = 83.8672, GNorm = 0.6407, lr_0 = 1.7289e-04
Loss = 8.5450e-02, PNorm = 83.8697, GNorm = 0.5560, lr_0 = 1.7277e-04
Loss = 7.5764e-02, PNorm = 83.8758, GNorm = 0.7794, lr_0 = 1.7265e-04
Loss = 8.0946e-02, PNorm = 83.8792, GNorm = 0.7773, lr_0 = 1.7253e-04
Loss = 7.8316e-02, PNorm = 83.8830, GNorm = 0.5412, lr_0 = 1.7242e-04
Loss = 8.4471e-02, PNorm = 83.8863, GNorm = 0.5878, lr_0 = 1.7230e-04
Loss = 7.6241e-02, PNorm = 83.8886, GNorm = 0.8025, lr_0 = 1.7218e-04
Loss = 8.0597e-02, PNorm = 83.8915, GNorm = 0.5767, lr_0 = 1.7206e-04
Loss = 9.2931e-02, PNorm = 83.8949, GNorm = 0.7050, lr_0 = 1.7194e-04
Loss = 8.4092e-02, PNorm = 83.8966, GNorm = 0.7168, lr_0 = 1.7183e-04
Loss = 7.5448e-02, PNorm = 83.8981, GNorm = 0.6029, lr_0 = 1.7171e-04
Loss = 7.7865e-02, PNorm = 83.9011, GNorm = 0.5584, lr_0 = 1.7159e-04
Loss = 8.3734e-02, PNorm = 83.9044, GNorm = 0.6102, lr_0 = 1.7147e-04
Loss = 9.5687e-02, PNorm = 83.9079, GNorm = 0.7857, lr_0 = 1.7136e-04
Loss = 8.0488e-02, PNorm = 83.9096, GNorm = 0.6451, lr_0 = 1.7124e-04
Loss = 7.7759e-02, PNorm = 83.9121, GNorm = 0.6202, lr_0 = 1.7112e-04
Loss = 9.1455e-02, PNorm = 83.9156, GNorm = 0.6240, lr_0 = 1.7100e-04
Loss = 9.0189e-02, PNorm = 83.9172, GNorm = 0.5514, lr_0 = 1.7089e-04
Loss = 7.6811e-02, PNorm = 83.9208, GNorm = 0.5035, lr_0 = 1.7077e-04
Loss = 9.0084e-02, PNorm = 83.9231, GNorm = 0.6219, lr_0 = 1.7065e-04
Loss = 8.3947e-02, PNorm = 83.9267, GNorm = 0.8841, lr_0 = 1.7054e-04
Loss = 8.6231e-02, PNorm = 83.9280, GNorm = 0.6418, lr_0 = 1.7042e-04
Loss = 8.8011e-02, PNorm = 83.9299, GNorm = 0.7096, lr_0 = 1.7030e-04
Loss = 9.2254e-02, PNorm = 83.9310, GNorm = 0.6865, lr_0 = 1.7019e-04
Loss = 8.5995e-02, PNorm = 83.9336, GNorm = 0.7206, lr_0 = 1.7007e-04
Loss = 9.0452e-02, PNorm = 83.9361, GNorm = 0.6247, lr_0 = 1.6995e-04
Loss = 8.4023e-02, PNorm = 83.9393, GNorm = 0.6243, lr_0 = 1.6984e-04
Loss = 7.4228e-02, PNorm = 83.9421, GNorm = 0.3957, lr_0 = 1.6972e-04
Loss = 8.3368e-02, PNorm = 83.9444, GNorm = 0.5943, lr_0 = 1.6960e-04
Loss = 8.1881e-02, PNorm = 83.9499, GNorm = 0.7873, lr_0 = 1.6949e-04
Loss = 7.6719e-02, PNorm = 83.9536, GNorm = 0.8727, lr_0 = 1.6937e-04
Loss = 8.7547e-02, PNorm = 83.9575, GNorm = 0.6200, lr_0 = 1.6926e-04
Loss = 8.3499e-02, PNorm = 83.9612, GNorm = 0.6406, lr_0 = 1.6914e-04
Loss = 8.5120e-02, PNorm = 83.9644, GNorm = 0.7451, lr_0 = 1.6902e-04
Loss = 9.3008e-02, PNorm = 83.9676, GNorm = 0.5928, lr_0 = 1.6891e-04
Loss = 7.7260e-02, PNorm = 83.9686, GNorm = 0.7815, lr_0 = 1.6879e-04
Loss = 8.7372e-02, PNorm = 83.9707, GNorm = 0.5496, lr_0 = 1.6868e-04
Loss = 8.9499e-02, PNorm = 83.9764, GNorm = 0.5089, lr_0 = 1.6856e-04
Loss = 8.7876e-02, PNorm = 83.9796, GNorm = 0.5852, lr_0 = 1.6845e-04
Loss = 8.3985e-02, PNorm = 83.9831, GNorm = 0.6148, lr_0 = 1.6833e-04
Loss = 7.9579e-02, PNorm = 83.9869, GNorm = 0.5693, lr_0 = 1.6821e-04
Loss = 7.4387e-02, PNorm = 83.9894, GNorm = 0.5896, lr_0 = 1.6810e-04
Loss = 8.2522e-02, PNorm = 83.9919, GNorm = 0.6404, lr_0 = 1.6798e-04
Loss = 8.2260e-02, PNorm = 83.9956, GNorm = 0.6096, lr_0 = 1.6787e-04
Loss = 8.1166e-02, PNorm = 83.9960, GNorm = 1.1602, lr_0 = 1.6775e-04
Loss = 8.5095e-02, PNorm = 83.9988, GNorm = 0.9748, lr_0 = 1.6764e-04
Loss = 7.1143e-02, PNorm = 83.9999, GNorm = 0.7601, lr_0 = 1.6752e-04
Loss = 8.4331e-02, PNorm = 84.0030, GNorm = 1.0843, lr_0 = 1.6741e-04
Loss = 8.5371e-02, PNorm = 84.0052, GNorm = 0.6066, lr_0 = 1.6729e-04
Loss = 8.8760e-02, PNorm = 84.0096, GNorm = 0.9830, lr_0 = 1.6718e-04
Loss = 8.2617e-02, PNorm = 84.0119, GNorm = 0.4605, lr_0 = 1.6707e-04
Loss = 7.9546e-02, PNorm = 84.0140, GNorm = 0.5758, lr_0 = 1.6695e-04
Loss = 9.5329e-02, PNorm = 84.0173, GNorm = 0.6564, lr_0 = 1.6684e-04
Loss = 9.3537e-02, PNorm = 84.0193, GNorm = 0.6570, lr_0 = 1.6672e-04
Loss = 9.5205e-02, PNorm = 84.0223, GNorm = 0.5607, lr_0 = 1.6661e-04
Loss = 9.2376e-02, PNorm = 84.0302, GNorm = 0.7293, lr_0 = 1.6649e-04
Loss = 8.7419e-02, PNorm = 84.0357, GNorm = 0.5377, lr_0 = 1.6638e-04
Loss = 9.9236e-02, PNorm = 84.0399, GNorm = 0.6754, lr_0 = 1.6627e-04
Loss = 1.0097e-01, PNorm = 84.0423, GNorm = 0.6030, lr_0 = 1.6615e-04
Loss = 8.0575e-02, PNorm = 84.0429, GNorm = 0.6010, lr_0 = 1.6604e-04
Loss = 8.9986e-02, PNorm = 84.0430, GNorm = 0.7471, lr_0 = 1.6592e-04
Loss = 9.2906e-02, PNorm = 84.0451, GNorm = 0.6815, lr_0 = 1.6581e-04
Loss = 7.8289e-02, PNorm = 84.0440, GNorm = 0.8569, lr_0 = 1.6570e-04
Loss = 7.9822e-02, PNorm = 84.0457, GNorm = 0.5957, lr_0 = 1.6558e-04
Loss = 8.0915e-02, PNorm = 84.0484, GNorm = 0.5225, lr_0 = 1.6547e-04
Loss = 9.0302e-02, PNorm = 84.0501, GNorm = 0.6054, lr_0 = 1.6536e-04
Loss = 8.6818e-02, PNorm = 84.0534, GNorm = 0.7579, lr_0 = 1.6524e-04
Loss = 8.7542e-02, PNorm = 84.0575, GNorm = 0.7532, lr_0 = 1.6513e-04
Loss = 8.9570e-02, PNorm = 84.0595, GNorm = 0.7748, lr_0 = 1.6502e-04
Loss = 9.2022e-02, PNorm = 84.0616, GNorm = 0.6499, lr_0 = 1.6490e-04
Loss = 8.9285e-02, PNorm = 84.0653, GNorm = 0.6887, lr_0 = 1.6479e-04
Loss = 9.7225e-02, PNorm = 84.0684, GNorm = 0.7855, lr_0 = 1.6468e-04
Loss = 8.5152e-02, PNorm = 84.0720, GNorm = 0.8156, lr_0 = 1.6457e-04
Loss = 8.3879e-02, PNorm = 84.0726, GNorm = 1.1738, lr_0 = 1.6445e-04
Loss = 8.0374e-02, PNorm = 84.0770, GNorm = 0.5653, lr_0 = 1.6434e-04
Loss = 8.5743e-02, PNorm = 84.0816, GNorm = 0.6731, lr_0 = 1.6423e-04
Loss = 7.8583e-02, PNorm = 84.0851, GNorm = 0.5091, lr_0 = 1.6412e-04
Loss = 8.7864e-02, PNorm = 84.0899, GNorm = 0.5827, lr_0 = 1.6400e-04
Loss = 9.3989e-02, PNorm = 84.0934, GNorm = 0.6415, lr_0 = 1.6389e-04
Loss = 9.5622e-02, PNorm = 84.0940, GNorm = 0.9124, lr_0 = 1.6378e-04
Validation mae = 0.228178
Epoch 24
Loss = 7.4104e-02, PNorm = 84.0935, GNorm = 0.6687, lr_0 = 1.6367e-04
Loss = 7.2851e-02, PNorm = 84.0943, GNorm = 0.5281, lr_0 = 1.6355e-04
Loss = 7.6417e-02, PNorm = 84.0962, GNorm = 0.6225, lr_0 = 1.6344e-04
Loss = 7.6614e-02, PNorm = 84.0984, GNorm = 0.6117, lr_0 = 1.6333e-04
Loss = 8.6366e-02, PNorm = 84.1006, GNorm = 0.8082, lr_0 = 1.6322e-04
Loss = 7.6555e-02, PNorm = 84.1076, GNorm = 0.6050, lr_0 = 1.6311e-04
Loss = 8.5089e-02, PNorm = 84.1096, GNorm = 0.9024, lr_0 = 1.6299e-04
Loss = 8.2894e-02, PNorm = 84.1092, GNorm = 0.5570, lr_0 = 1.6288e-04
Loss = 8.0959e-02, PNorm = 84.1108, GNorm = 0.5629, lr_0 = 1.6277e-04
Loss = 8.2450e-02, PNorm = 84.1136, GNorm = 0.5388, lr_0 = 1.6266e-04
Loss = 8.2137e-02, PNorm = 84.1163, GNorm = 0.9064, lr_0 = 1.6255e-04
Loss = 7.8449e-02, PNorm = 84.1184, GNorm = 0.5476, lr_0 = 1.6244e-04
Loss = 8.0817e-02, PNorm = 84.1198, GNorm = 0.6149, lr_0 = 1.6233e-04
Loss = 7.9169e-02, PNorm = 84.1230, GNorm = 0.5509, lr_0 = 1.6221e-04
Loss = 8.2480e-02, PNorm = 84.1260, GNorm = 0.4811, lr_0 = 1.6210e-04
Loss = 7.4153e-02, PNorm = 84.1292, GNorm = 0.5954, lr_0 = 1.6199e-04
Loss = 7.7635e-02, PNorm = 84.1326, GNorm = 0.4985, lr_0 = 1.6188e-04
Loss = 6.9077e-02, PNorm = 84.1334, GNorm = 0.5981, lr_0 = 1.6177e-04
Loss = 8.2504e-02, PNorm = 84.1360, GNorm = 0.6429, lr_0 = 1.6166e-04
Loss = 7.5878e-02, PNorm = 84.1387, GNorm = 0.5897, lr_0 = 1.6155e-04
Loss = 7.1789e-02, PNorm = 84.1406, GNorm = 0.5704, lr_0 = 1.6144e-04
Loss = 8.4016e-02, PNorm = 84.1429, GNorm = 0.5930, lr_0 = 1.6133e-04
Loss = 7.7279e-02, PNorm = 84.1478, GNorm = 0.6392, lr_0 = 1.6122e-04
Loss = 8.4749e-02, PNorm = 84.1504, GNorm = 0.9851, lr_0 = 1.6111e-04
Loss = 8.3302e-02, PNorm = 84.1504, GNorm = 0.5319, lr_0 = 1.6100e-04
Loss = 8.5065e-02, PNorm = 84.1526, GNorm = 0.7197, lr_0 = 1.6089e-04
Loss = 8.1770e-02, PNorm = 84.1548, GNorm = 0.6946, lr_0 = 1.6078e-04
Loss = 8.1321e-02, PNorm = 84.1580, GNorm = 0.8653, lr_0 = 1.6067e-04
Loss = 8.2475e-02, PNorm = 84.1622, GNorm = 0.5707, lr_0 = 1.6056e-04
Loss = 9.2913e-02, PNorm = 84.1662, GNorm = 0.7201, lr_0 = 1.6045e-04
Loss = 8.1721e-02, PNorm = 84.1692, GNorm = 0.6635, lr_0 = 1.6034e-04
Loss = 8.0325e-02, PNorm = 84.1703, GNorm = 0.9040, lr_0 = 1.6023e-04
Loss = 7.5655e-02, PNorm = 84.1726, GNorm = 0.8930, lr_0 = 1.6012e-04
Loss = 7.1880e-02, PNorm = 84.1744, GNorm = 0.6907, lr_0 = 1.6001e-04
Loss = 7.8963e-02, PNorm = 84.1759, GNorm = 0.6103, lr_0 = 1.5990e-04
Loss = 8.3809e-02, PNorm = 84.1775, GNorm = 1.0221, lr_0 = 1.5979e-04
Loss = 7.5628e-02, PNorm = 84.1809, GNorm = 0.6521, lr_0 = 1.5968e-04
Loss = 7.9860e-02, PNorm = 84.1837, GNorm = 0.6062, lr_0 = 1.5957e-04
Loss = 8.4258e-02, PNorm = 84.1877, GNorm = 0.6996, lr_0 = 1.5946e-04
Loss = 7.7363e-02, PNorm = 84.1900, GNorm = 0.7981, lr_0 = 1.5935e-04
Loss = 8.7925e-02, PNorm = 84.1920, GNorm = 0.8016, lr_0 = 1.5924e-04
Loss = 8.5865e-02, PNorm = 84.1950, GNorm = 0.6073, lr_0 = 1.5913e-04
Loss = 8.7613e-02, PNorm = 84.1974, GNorm = 0.6226, lr_0 = 1.5902e-04
Loss = 8.7138e-02, PNorm = 84.1998, GNorm = 0.7538, lr_0 = 1.5891e-04
Loss = 8.5428e-02, PNorm = 84.1994, GNorm = 0.5501, lr_0 = 1.5880e-04
Loss = 8.1843e-02, PNorm = 84.2009, GNorm = 0.6451, lr_0 = 1.5870e-04
Loss = 7.4851e-02, PNorm = 84.2021, GNorm = 0.6103, lr_0 = 1.5859e-04
Loss = 8.3244e-02, PNorm = 84.2042, GNorm = 0.5680, lr_0 = 1.5848e-04
Loss = 7.9438e-02, PNorm = 84.2066, GNorm = 0.6661, lr_0 = 1.5837e-04
Loss = 7.8886e-02, PNorm = 84.2096, GNorm = 0.6483, lr_0 = 1.5826e-04
Loss = 7.0879e-02, PNorm = 84.2117, GNorm = 0.6184, lr_0 = 1.5815e-04
Loss = 7.6451e-02, PNorm = 84.2139, GNorm = 0.3611, lr_0 = 1.5804e-04
Loss = 8.1965e-02, PNorm = 84.2176, GNorm = 0.6711, lr_0 = 1.5794e-04
Loss = 8.2338e-02, PNorm = 84.2206, GNorm = 0.6515, lr_0 = 1.5783e-04
Loss = 7.9345e-02, PNorm = 84.2225, GNorm = 0.4490, lr_0 = 1.5772e-04
Loss = 8.0042e-02, PNorm = 84.2246, GNorm = 0.3964, lr_0 = 1.5761e-04
Loss = 8.2211e-02, PNorm = 84.2261, GNorm = 0.5371, lr_0 = 1.5750e-04
Loss = 8.7696e-02, PNorm = 84.2296, GNorm = 0.4909, lr_0 = 1.5740e-04
Loss = 8.6863e-02, PNorm = 84.2328, GNorm = 0.5705, lr_0 = 1.5729e-04
Loss = 7.5381e-02, PNorm = 84.2358, GNorm = 0.6556, lr_0 = 1.5718e-04
Loss = 8.7405e-02, PNorm = 84.2375, GNorm = 0.5837, lr_0 = 1.5707e-04
Loss = 8.3114e-02, PNorm = 84.2391, GNorm = 0.5579, lr_0 = 1.5697e-04
Loss = 8.4245e-02, PNorm = 84.2421, GNorm = 0.7793, lr_0 = 1.5686e-04
Loss = 8.9138e-02, PNorm = 84.2435, GNorm = 0.9451, lr_0 = 1.5675e-04
Loss = 7.6900e-02, PNorm = 84.2443, GNorm = 0.6105, lr_0 = 1.5664e-04
Loss = 9.5991e-02, PNorm = 84.2479, GNorm = 1.2082, lr_0 = 1.5654e-04
Loss = 9.1551e-02, PNorm = 84.2498, GNorm = 0.6521, lr_0 = 1.5643e-04
Loss = 8.3883e-02, PNorm = 84.2535, GNorm = 0.6523, lr_0 = 1.5632e-04
Loss = 9.1689e-02, PNorm = 84.2548, GNorm = 1.1170, lr_0 = 1.5621e-04
Loss = 8.7825e-02, PNorm = 84.2571, GNorm = 1.0080, lr_0 = 1.5611e-04
Loss = 8.1027e-02, PNorm = 84.2600, GNorm = 0.9510, lr_0 = 1.5600e-04
Loss = 8.3291e-02, PNorm = 84.2631, GNorm = 0.5793, lr_0 = 1.5589e-04
Loss = 8.5412e-02, PNorm = 84.2674, GNorm = 0.4566, lr_0 = 1.5579e-04
Loss = 7.2189e-02, PNorm = 84.2708, GNorm = 0.5669, lr_0 = 1.5568e-04
Loss = 7.7852e-02, PNorm = 84.2737, GNorm = 0.5402, lr_0 = 1.5557e-04
Loss = 9.4219e-02, PNorm = 84.2770, GNorm = 0.6535, lr_0 = 1.5547e-04
Loss = 8.1051e-02, PNorm = 84.2793, GNorm = 0.6456, lr_0 = 1.5536e-04
Loss = 8.9573e-02, PNorm = 84.2811, GNorm = 1.0023, lr_0 = 1.5525e-04
Loss = 7.5209e-02, PNorm = 84.2832, GNorm = 0.6198, lr_0 = 1.5515e-04
Loss = 9.3069e-02, PNorm = 84.2832, GNorm = 0.6725, lr_0 = 1.5504e-04
Loss = 8.7514e-02, PNorm = 84.2865, GNorm = 0.9443, lr_0 = 1.5493e-04
Loss = 7.1140e-02, PNorm = 84.2906, GNorm = 0.7275, lr_0 = 1.5483e-04
Loss = 7.8001e-02, PNorm = 84.2926, GNorm = 0.6391, lr_0 = 1.5472e-04
Loss = 7.8568e-02, PNorm = 84.2964, GNorm = 0.6622, lr_0 = 1.5462e-04
Loss = 8.1691e-02, PNorm = 84.2987, GNorm = 0.7585, lr_0 = 1.5451e-04
Loss = 7.9708e-02, PNorm = 84.3006, GNorm = 0.8634, lr_0 = 1.5440e-04
Loss = 7.8525e-02, PNorm = 84.3013, GNorm = 0.7072, lr_0 = 1.5430e-04
Loss = 7.5340e-02, PNorm = 84.3035, GNorm = 0.7004, lr_0 = 1.5419e-04
Loss = 7.2565e-02, PNorm = 84.3052, GNorm = 0.5755, lr_0 = 1.5409e-04
Loss = 8.1707e-02, PNorm = 84.3071, GNorm = 0.5760, lr_0 = 1.5398e-04
Loss = 8.6318e-02, PNorm = 84.3082, GNorm = 0.7309, lr_0 = 1.5388e-04
Loss = 8.2794e-02, PNorm = 84.3103, GNorm = 0.7862, lr_0 = 1.5377e-04
Loss = 8.2944e-02, PNorm = 84.3130, GNorm = 0.6024, lr_0 = 1.5367e-04
Loss = 8.4431e-02, PNorm = 84.3156, GNorm = 0.5943, lr_0 = 1.5356e-04
Loss = 7.9383e-02, PNorm = 84.3197, GNorm = 0.6169, lr_0 = 1.5346e-04
Loss = 8.0664e-02, PNorm = 84.3244, GNorm = 0.6213, lr_0 = 1.5335e-04
Loss = 9.8062e-02, PNorm = 84.3277, GNorm = 0.5360, lr_0 = 1.5325e-04
Loss = 9.1346e-02, PNorm = 84.3296, GNorm = 0.8156, lr_0 = 1.5314e-04
Loss = 8.7424e-02, PNorm = 84.3318, GNorm = 0.8740, lr_0 = 1.5304e-04
Loss = 9.9213e-02, PNorm = 84.3369, GNorm = 0.5843, lr_0 = 1.5293e-04
Loss = 8.0339e-02, PNorm = 84.3405, GNorm = 0.6592, lr_0 = 1.5283e-04
Loss = 7.7443e-02, PNorm = 84.3413, GNorm = 0.4566, lr_0 = 1.5272e-04
Loss = 8.4913e-02, PNorm = 84.3424, GNorm = 0.5911, lr_0 = 1.5262e-04
Loss = 8.6490e-02, PNorm = 84.3425, GNorm = 0.5625, lr_0 = 1.5251e-04
Loss = 8.9124e-02, PNorm = 84.3454, GNorm = 0.6428, lr_0 = 1.5241e-04
Loss = 8.8405e-02, PNorm = 84.3495, GNorm = 0.6515, lr_0 = 1.5230e-04
Loss = 8.6467e-02, PNorm = 84.3523, GNorm = 0.5117, lr_0 = 1.5220e-04
Loss = 8.0189e-02, PNorm = 84.3530, GNorm = 0.5605, lr_0 = 1.5209e-04
Loss = 8.2924e-02, PNorm = 84.3541, GNorm = 0.5596, lr_0 = 1.5199e-04
Loss = 9.3902e-02, PNorm = 84.3565, GNorm = 0.6138, lr_0 = 1.5189e-04
Loss = 7.7562e-02, PNorm = 84.3587, GNorm = 0.4772, lr_0 = 1.5178e-04
Loss = 7.6734e-02, PNorm = 84.3608, GNorm = 0.5742, lr_0 = 1.5168e-04
Loss = 7.8310e-02, PNorm = 84.3648, GNorm = 0.7197, lr_0 = 1.5157e-04
Loss = 9.5385e-02, PNorm = 84.3641, GNorm = 0.5353, lr_0 = 1.5147e-04
Loss = 8.6281e-02, PNorm = 84.3684, GNorm = 0.6484, lr_0 = 1.5137e-04
Loss = 1.0123e-01, PNorm = 84.3719, GNorm = 0.5516, lr_0 = 1.5126e-04
Loss = 8.0135e-02, PNorm = 84.3745, GNorm = 0.5591, lr_0 = 1.5116e-04
Loss = 8.2579e-02, PNorm = 84.3775, GNorm = 0.9332, lr_0 = 1.5106e-04
Loss = 8.7107e-02, PNorm = 84.3811, GNorm = 0.8771, lr_0 = 1.5095e-04
Loss = 9.4552e-02, PNorm = 84.3825, GNorm = 0.7289, lr_0 = 1.5085e-04
Validation mae = 0.226981
Epoch 25
Loss = 8.6969e-02, PNorm = 84.3848, GNorm = 0.8032, lr_0 = 1.5075e-04
Loss = 7.1322e-02, PNorm = 84.3888, GNorm = 0.6447, lr_0 = 1.5064e-04
Loss = 7.0785e-02, PNorm = 84.3921, GNorm = 0.4702, lr_0 = 1.5054e-04
Loss = 7.7111e-02, PNorm = 84.3955, GNorm = 0.5344, lr_0 = 1.5044e-04
Loss = 8.0472e-02, PNorm = 84.3946, GNorm = 0.5143, lr_0 = 1.5033e-04
Loss = 7.3670e-02, PNorm = 84.3965, GNorm = 0.6509, lr_0 = 1.5023e-04
Loss = 7.4727e-02, PNorm = 84.3987, GNorm = 0.5655, lr_0 = 1.5013e-04
Loss = 8.5573e-02, PNorm = 84.4037, GNorm = 0.7117, lr_0 = 1.5002e-04
Loss = 8.3936e-02, PNorm = 84.4087, GNorm = 0.7249, lr_0 = 1.4992e-04
Loss = 7.7796e-02, PNorm = 84.4113, GNorm = 0.5953, lr_0 = 1.4982e-04
Loss = 7.1851e-02, PNorm = 84.4132, GNorm = 0.6280, lr_0 = 1.4972e-04
Loss = 6.9058e-02, PNorm = 84.4173, GNorm = 0.6450, lr_0 = 1.4961e-04
Loss = 8.3896e-02, PNorm = 84.4209, GNorm = 0.5722, lr_0 = 1.4951e-04
Loss = 8.6441e-02, PNorm = 84.4229, GNorm = 0.7994, lr_0 = 1.4941e-04
Loss = 7.3891e-02, PNorm = 84.4244, GNorm = 0.8118, lr_0 = 1.4931e-04
Loss = 8.6676e-02, PNorm = 84.4268, GNorm = 0.9207, lr_0 = 1.4920e-04
Loss = 8.7536e-02, PNorm = 84.4284, GNorm = 0.7236, lr_0 = 1.4910e-04
Loss = 7.1980e-02, PNorm = 84.4296, GNorm = 0.7235, lr_0 = 1.4900e-04
Loss = 9.2180e-02, PNorm = 84.4331, GNorm = 0.5524, lr_0 = 1.4890e-04
Loss = 8.4844e-02, PNorm = 84.4362, GNorm = 0.6744, lr_0 = 1.4880e-04
Loss = 8.7755e-02, PNorm = 84.4374, GNorm = 0.5539, lr_0 = 1.4869e-04
Loss = 7.7937e-02, PNorm = 84.4391, GNorm = 0.6302, lr_0 = 1.4859e-04
Loss = 7.4666e-02, PNorm = 84.4403, GNorm = 0.7729, lr_0 = 1.4849e-04
Loss = 8.3058e-02, PNorm = 84.4405, GNorm = 0.7170, lr_0 = 1.4839e-04
Loss = 8.1288e-02, PNorm = 84.4420, GNorm = 0.4903, lr_0 = 1.4829e-04
Loss = 8.0441e-02, PNorm = 84.4446, GNorm = 0.6027, lr_0 = 1.4818e-04
Loss = 7.4731e-02, PNorm = 84.4475, GNorm = 0.4850, lr_0 = 1.4808e-04
Loss = 7.3853e-02, PNorm = 84.4514, GNorm = 0.6148, lr_0 = 1.4798e-04
Loss = 8.3002e-02, PNorm = 84.4533, GNorm = 0.9498, lr_0 = 1.4788e-04
Loss = 8.6458e-02, PNorm = 84.4557, GNorm = 0.8607, lr_0 = 1.4778e-04
Loss = 7.3833e-02, PNorm = 84.4577, GNorm = 0.6705, lr_0 = 1.4768e-04
Loss = 7.4919e-02, PNorm = 84.4597, GNorm = 0.6662, lr_0 = 1.4758e-04
Loss = 7.7888e-02, PNorm = 84.4619, GNorm = 0.6776, lr_0 = 1.4748e-04
Loss = 8.5534e-02, PNorm = 84.4628, GNorm = 0.6712, lr_0 = 1.4737e-04
Loss = 8.2125e-02, PNorm = 84.4649, GNorm = 0.6646, lr_0 = 1.4727e-04
Loss = 7.0739e-02, PNorm = 84.4656, GNorm = 0.7123, lr_0 = 1.4717e-04
Loss = 7.6042e-02, PNorm = 84.4682, GNorm = 0.5784, lr_0 = 1.4707e-04
Loss = 7.4658e-02, PNorm = 84.4708, GNorm = 0.6366, lr_0 = 1.4697e-04
Loss = 7.8587e-02, PNorm = 84.4734, GNorm = 0.5344, lr_0 = 1.4687e-04
Loss = 7.8374e-02, PNorm = 84.4769, GNorm = 0.5642, lr_0 = 1.4677e-04
Loss = 8.4869e-02, PNorm = 84.4791, GNorm = 0.6987, lr_0 = 1.4667e-04
Loss = 7.8716e-02, PNorm = 84.4798, GNorm = 0.6016, lr_0 = 1.4657e-04
Loss = 7.9282e-02, PNorm = 84.4832, GNorm = 0.4932, lr_0 = 1.4647e-04
Loss = 7.8918e-02, PNorm = 84.4853, GNorm = 0.5135, lr_0 = 1.4637e-04
Loss = 7.7730e-02, PNorm = 84.4865, GNorm = 0.5996, lr_0 = 1.4627e-04
Loss = 7.1773e-02, PNorm = 84.4872, GNorm = 0.7217, lr_0 = 1.4617e-04
Loss = 7.9992e-02, PNorm = 84.4898, GNorm = 0.6487, lr_0 = 1.4607e-04
Loss = 9.7334e-02, PNorm = 84.4915, GNorm = 0.7250, lr_0 = 1.4597e-04
Loss = 7.8148e-02, PNorm = 84.4929, GNorm = 0.6255, lr_0 = 1.4587e-04
Loss = 9.2347e-02, PNorm = 84.4964, GNorm = 0.6742, lr_0 = 1.4577e-04
Loss = 7.9572e-02, PNorm = 84.4996, GNorm = 0.5323, lr_0 = 1.4567e-04
Loss = 8.2701e-02, PNorm = 84.5043, GNorm = 0.5523, lr_0 = 1.4557e-04
Loss = 8.6221e-02, PNorm = 84.5072, GNorm = 0.6327, lr_0 = 1.4547e-04
Loss = 7.3412e-02, PNorm = 84.5103, GNorm = 0.5935, lr_0 = 1.4537e-04
Loss = 7.8332e-02, PNorm = 84.5131, GNorm = 0.5805, lr_0 = 1.4527e-04
Loss = 7.3643e-02, PNorm = 84.5177, GNorm = 0.7461, lr_0 = 1.4517e-04
Loss = 8.2419e-02, PNorm = 84.5170, GNorm = 0.8866, lr_0 = 1.4507e-04
Loss = 8.6364e-02, PNorm = 84.5162, GNorm = 0.6012, lr_0 = 1.4497e-04
Loss = 7.2191e-02, PNorm = 84.5183, GNorm = 0.5909, lr_0 = 1.4487e-04
Loss = 8.9363e-02, PNorm = 84.5206, GNorm = 0.7148, lr_0 = 1.4477e-04
Loss = 8.1435e-02, PNorm = 84.5250, GNorm = 0.6078, lr_0 = 1.4467e-04
Loss = 6.7563e-02, PNorm = 84.5276, GNorm = 0.5827, lr_0 = 1.4457e-04
Loss = 6.7448e-02, PNorm = 84.5289, GNorm = 0.6733, lr_0 = 1.4447e-04
Loss = 8.1742e-02, PNorm = 84.5314, GNorm = 0.5529, lr_0 = 1.4438e-04
Loss = 8.6591e-02, PNorm = 84.5311, GNorm = 0.6623, lr_0 = 1.4428e-04
Loss = 8.0217e-02, PNorm = 84.5324, GNorm = 0.6092, lr_0 = 1.4418e-04
Loss = 7.3387e-02, PNorm = 84.5359, GNorm = 0.6222, lr_0 = 1.4408e-04
Loss = 7.5303e-02, PNorm = 84.5393, GNorm = 0.7138, lr_0 = 1.4398e-04
Loss = 8.5430e-02, PNorm = 84.5422, GNorm = 0.7816, lr_0 = 1.4388e-04
Loss = 8.9384e-02, PNorm = 84.5449, GNorm = 0.6270, lr_0 = 1.4378e-04
Loss = 7.8222e-02, PNorm = 84.5473, GNorm = 0.6099, lr_0 = 1.4368e-04
Loss = 7.6458e-02, PNorm = 84.5498, GNorm = 0.7206, lr_0 = 1.4359e-04
Loss = 7.4956e-02, PNorm = 84.5531, GNorm = 0.7102, lr_0 = 1.4349e-04
Loss = 8.2616e-02, PNorm = 84.5547, GNorm = 0.7105, lr_0 = 1.4339e-04
Loss = 7.8256e-02, PNorm = 84.5565, GNorm = 0.6297, lr_0 = 1.4329e-04
Loss = 7.6660e-02, PNorm = 84.5609, GNorm = 0.7086, lr_0 = 1.4319e-04
Loss = 8.7063e-02, PNorm = 84.5632, GNorm = 0.6368, lr_0 = 1.4310e-04
Loss = 6.6791e-02, PNorm = 84.5658, GNorm = 0.6278, lr_0 = 1.4300e-04
Loss = 8.6201e-02, PNorm = 84.5697, GNorm = 0.6643, lr_0 = 1.4290e-04
Loss = 8.6410e-02, PNorm = 84.5710, GNorm = 0.7038, lr_0 = 1.4280e-04
Loss = 8.4070e-02, PNorm = 84.5734, GNorm = 0.5565, lr_0 = 1.4270e-04
Loss = 8.5754e-02, PNorm = 84.5759, GNorm = 0.6260, lr_0 = 1.4261e-04
Loss = 8.2076e-02, PNorm = 84.5784, GNorm = 0.7463, lr_0 = 1.4251e-04
Loss = 9.0381e-02, PNorm = 84.5796, GNorm = 0.5527, lr_0 = 1.4241e-04
Loss = 7.8575e-02, PNorm = 84.5819, GNorm = 1.0543, lr_0 = 1.4231e-04
Loss = 8.1422e-02, PNorm = 84.5842, GNorm = 0.5955, lr_0 = 1.4222e-04
Loss = 7.6545e-02, PNorm = 84.5856, GNorm = 0.8318, lr_0 = 1.4212e-04
Loss = 8.4454e-02, PNorm = 84.5860, GNorm = 0.6992, lr_0 = 1.4202e-04
Loss = 8.1657e-02, PNorm = 84.5877, GNorm = 0.7251, lr_0 = 1.4192e-04
Loss = 7.4141e-02, PNorm = 84.5912, GNorm = 0.5426, lr_0 = 1.4183e-04
Loss = 7.5273e-02, PNorm = 84.5949, GNorm = 0.4628, lr_0 = 1.4173e-04
Loss = 8.3315e-02, PNorm = 84.5971, GNorm = 0.5994, lr_0 = 1.4163e-04
Loss = 7.6448e-02, PNorm = 84.5998, GNorm = 0.5969, lr_0 = 1.4153e-04
Loss = 7.8168e-02, PNorm = 84.6017, GNorm = 0.6521, lr_0 = 1.4144e-04
Loss = 7.9152e-02, PNorm = 84.6031, GNorm = 0.6899, lr_0 = 1.4134e-04
Loss = 7.5849e-02, PNorm = 84.6059, GNorm = 0.5369, lr_0 = 1.4124e-04
Loss = 8.4779e-02, PNorm = 84.6081, GNorm = 0.6397, lr_0 = 1.4115e-04
Loss = 8.2419e-02, PNorm = 84.6102, GNorm = 0.4759, lr_0 = 1.4105e-04
Loss = 7.9848e-02, PNorm = 84.6115, GNorm = 0.7477, lr_0 = 1.4095e-04
Loss = 7.6129e-02, PNorm = 84.6135, GNorm = 0.7143, lr_0 = 1.4086e-04
Loss = 7.3475e-02, PNorm = 84.6149, GNorm = 0.7576, lr_0 = 1.4076e-04
Loss = 7.5681e-02, PNorm = 84.6159, GNorm = 0.9364, lr_0 = 1.4066e-04
Loss = 9.2732e-02, PNorm = 84.6179, GNorm = 0.6674, lr_0 = 1.4057e-04
Loss = 8.8065e-02, PNorm = 84.6207, GNorm = 0.7912, lr_0 = 1.4047e-04
Loss = 7.8444e-02, PNorm = 84.6229, GNorm = 0.5416, lr_0 = 1.4038e-04
Loss = 8.6727e-02, PNorm = 84.6248, GNorm = 0.7204, lr_0 = 1.4028e-04
Loss = 8.0971e-02, PNorm = 84.6256, GNorm = 0.7347, lr_0 = 1.4018e-04
Loss = 7.6154e-02, PNorm = 84.6267, GNorm = 0.8529, lr_0 = 1.4009e-04
Loss = 8.6726e-02, PNorm = 84.6291, GNorm = 0.6430, lr_0 = 1.3999e-04
Loss = 7.5574e-02, PNorm = 84.6322, GNorm = 0.5744, lr_0 = 1.3990e-04
Loss = 9.0100e-02, PNorm = 84.6339, GNorm = 0.9423, lr_0 = 1.3980e-04
Loss = 7.9185e-02, PNorm = 84.6353, GNorm = 0.6534, lr_0 = 1.3970e-04
Loss = 7.9676e-02, PNorm = 84.6381, GNorm = 0.6951, lr_0 = 1.3961e-04
Loss = 8.7545e-02, PNorm = 84.6409, GNorm = 0.6081, lr_0 = 1.3951e-04
Loss = 8.2551e-02, PNorm = 84.6448, GNorm = 0.7516, lr_0 = 1.3942e-04
Loss = 7.5765e-02, PNorm = 84.6466, GNorm = 0.5863, lr_0 = 1.3932e-04
Loss = 8.8388e-02, PNorm = 84.6476, GNorm = 0.7840, lr_0 = 1.3923e-04
Loss = 8.3757e-02, PNorm = 84.6509, GNorm = 0.6453, lr_0 = 1.3913e-04
Loss = 7.6725e-02, PNorm = 84.6527, GNorm = 0.8944, lr_0 = 1.3904e-04
Loss = 7.4222e-02, PNorm = 84.6516, GNorm = 0.5969, lr_0 = 1.3894e-04
Validation mae = 0.228240
Epoch 26
Loss = 8.0867e-02, PNorm = 84.6555, GNorm = 0.5912, lr_0 = 1.3884e-04
Loss = 7.3691e-02, PNorm = 84.6596, GNorm = 0.5618, lr_0 = 1.3875e-04
Loss = 7.7542e-02, PNorm = 84.6615, GNorm = 0.7724, lr_0 = 1.3865e-04
Loss = 6.7366e-02, PNorm = 84.6643, GNorm = 0.5305, lr_0 = 1.3856e-04
Loss = 8.0413e-02, PNorm = 84.6648, GNorm = 0.7886, lr_0 = 1.3846e-04
Loss = 7.9900e-02, PNorm = 84.6680, GNorm = 0.5961, lr_0 = 1.3837e-04
Loss = 8.4432e-02, PNorm = 84.6710, GNorm = 0.5983, lr_0 = 1.3828e-04
Loss = 7.2554e-02, PNorm = 84.6732, GNorm = 0.5979, lr_0 = 1.3818e-04
Loss = 6.5291e-02, PNorm = 84.6765, GNorm = 0.5356, lr_0 = 1.3809e-04
Loss = 7.1468e-02, PNorm = 84.6783, GNorm = 0.6582, lr_0 = 1.3799e-04
Loss = 7.5830e-02, PNorm = 84.6804, GNorm = 0.5693, lr_0 = 1.3790e-04
Loss = 7.5279e-02, PNorm = 84.6818, GNorm = 0.6700, lr_0 = 1.3780e-04
Loss = 6.7571e-02, PNorm = 84.6820, GNorm = 0.6212, lr_0 = 1.3771e-04
Loss = 7.1139e-02, PNorm = 84.6840, GNorm = 0.5600, lr_0 = 1.3761e-04
Loss = 7.6451e-02, PNorm = 84.6864, GNorm = 0.5338, lr_0 = 1.3752e-04
Loss = 6.9634e-02, PNorm = 84.6891, GNorm = 0.6065, lr_0 = 1.3742e-04
Loss = 7.9268e-02, PNorm = 84.6912, GNorm = 0.5708, lr_0 = 1.3733e-04
Loss = 8.2328e-02, PNorm = 84.6926, GNorm = 0.7171, lr_0 = 1.3724e-04
Loss = 8.4445e-02, PNorm = 84.6940, GNorm = 0.8614, lr_0 = 1.3714e-04
Loss = 6.8223e-02, PNorm = 84.6957, GNorm = 0.7422, lr_0 = 1.3705e-04
Loss = 8.0367e-02, PNorm = 84.6982, GNorm = 0.7794, lr_0 = 1.3695e-04
Loss = 8.9890e-02, PNorm = 84.7009, GNorm = 0.7402, lr_0 = 1.3686e-04
Loss = 8.0317e-02, PNorm = 84.7042, GNorm = 0.6759, lr_0 = 1.3677e-04
Loss = 7.5768e-02, PNorm = 84.7082, GNorm = 0.7335, lr_0 = 1.3667e-04
Loss = 7.6936e-02, PNorm = 84.7100, GNorm = 0.8726, lr_0 = 1.3658e-04
Loss = 8.0087e-02, PNorm = 84.7105, GNorm = 0.5106, lr_0 = 1.3649e-04
Loss = 8.6106e-02, PNorm = 84.7135, GNorm = 0.6740, lr_0 = 1.3639e-04
Loss = 7.5124e-02, PNorm = 84.7169, GNorm = 0.3817, lr_0 = 1.3630e-04
Loss = 7.5521e-02, PNorm = 84.7201, GNorm = 0.5182, lr_0 = 1.3621e-04
Loss = 8.3867e-02, PNorm = 84.7246, GNorm = 0.5553, lr_0 = 1.3611e-04
Loss = 6.9777e-02, PNorm = 84.7272, GNorm = 0.6132, lr_0 = 1.3602e-04
Loss = 7.4512e-02, PNorm = 84.7278, GNorm = 0.7235, lr_0 = 1.3593e-04
Loss = 8.1688e-02, PNorm = 84.7316, GNorm = 0.6082, lr_0 = 1.3583e-04
Loss = 8.2434e-02, PNorm = 84.7348, GNorm = 0.6655, lr_0 = 1.3574e-04
Loss = 7.2469e-02, PNorm = 84.7365, GNorm = 0.6057, lr_0 = 1.3565e-04
Loss = 8.2733e-02, PNorm = 84.7392, GNorm = 0.6812, lr_0 = 1.3555e-04
Loss = 7.1493e-02, PNorm = 84.7405, GNorm = 0.6740, lr_0 = 1.3546e-04
Loss = 7.3706e-02, PNorm = 84.7426, GNorm = 0.6348, lr_0 = 1.3537e-04
Loss = 7.5542e-02, PNorm = 84.7445, GNorm = 0.6763, lr_0 = 1.3528e-04
Loss = 7.8033e-02, PNorm = 84.7458, GNorm = 1.0608, lr_0 = 1.3518e-04
Loss = 7.7522e-02, PNorm = 84.7458, GNorm = 0.9678, lr_0 = 1.3509e-04
Loss = 8.0495e-02, PNorm = 84.7475, GNorm = 0.8290, lr_0 = 1.3500e-04
Loss = 7.9454e-02, PNorm = 84.7501, GNorm = 0.7661, lr_0 = 1.3491e-04
Loss = 7.7881e-02, PNorm = 84.7505, GNorm = 0.5647, lr_0 = 1.3481e-04
Loss = 8.0482e-02, PNorm = 84.7533, GNorm = 0.6198, lr_0 = 1.3472e-04
Loss = 7.8688e-02, PNorm = 84.7555, GNorm = 0.7213, lr_0 = 1.3463e-04
Loss = 7.6876e-02, PNorm = 84.7579, GNorm = 0.6402, lr_0 = 1.3454e-04
Loss = 8.3668e-02, PNorm = 84.7608, GNorm = 0.5962, lr_0 = 1.3444e-04
Loss = 8.4455e-02, PNorm = 84.7639, GNorm = 0.6853, lr_0 = 1.3435e-04
Loss = 7.6082e-02, PNorm = 84.7651, GNorm = 0.6883, lr_0 = 1.3426e-04
Loss = 6.8748e-02, PNorm = 84.7654, GNorm = 0.8995, lr_0 = 1.3417e-04
Loss = 7.2906e-02, PNorm = 84.7671, GNorm = 0.9460, lr_0 = 1.3408e-04
Loss = 7.2212e-02, PNorm = 84.7724, GNorm = 0.6176, lr_0 = 1.3398e-04
Loss = 7.6404e-02, PNorm = 84.7759, GNorm = 0.6953, lr_0 = 1.3389e-04
Loss = 7.4121e-02, PNorm = 84.7785, GNorm = 0.6722, lr_0 = 1.3380e-04
Loss = 6.8128e-02, PNorm = 84.7789, GNorm = 0.6703, lr_0 = 1.3371e-04
Loss = 7.6376e-02, PNorm = 84.7813, GNorm = 0.5858, lr_0 = 1.3362e-04
Loss = 7.6157e-02, PNorm = 84.7840, GNorm = 0.5610, lr_0 = 1.3353e-04
Loss = 8.9141e-02, PNorm = 84.7882, GNorm = 0.6578, lr_0 = 1.3343e-04
Loss = 7.6778e-02, PNorm = 84.7901, GNorm = 0.7796, lr_0 = 1.3334e-04
Loss = 8.2333e-02, PNorm = 84.7914, GNorm = 0.6592, lr_0 = 1.3325e-04
Loss = 7.6253e-02, PNorm = 84.7934, GNorm = 0.7807, lr_0 = 1.3316e-04
Loss = 7.2734e-02, PNorm = 84.7947, GNorm = 1.1966, lr_0 = 1.3307e-04
Loss = 6.9576e-02, PNorm = 84.7956, GNorm = 0.6979, lr_0 = 1.3298e-04
Loss = 8.3071e-02, PNorm = 84.7987, GNorm = 0.6193, lr_0 = 1.3289e-04
Loss = 7.3552e-02, PNorm = 84.8023, GNorm = 0.8907, lr_0 = 1.3280e-04
Loss = 6.7739e-02, PNorm = 84.8041, GNorm = 0.7996, lr_0 = 1.3270e-04
Loss = 8.0597e-02, PNorm = 84.8059, GNorm = 0.6099, lr_0 = 1.3261e-04
Loss = 7.5288e-02, PNorm = 84.8078, GNorm = 0.5818, lr_0 = 1.3252e-04
Loss = 8.4501e-02, PNorm = 84.8100, GNorm = 0.4750, lr_0 = 1.3243e-04
Loss = 7.3850e-02, PNorm = 84.8110, GNorm = 0.7222, lr_0 = 1.3234e-04
Loss = 7.1859e-02, PNorm = 84.8134, GNorm = 0.5804, lr_0 = 1.3225e-04
Loss = 9.1869e-02, PNorm = 84.8142, GNorm = 0.5677, lr_0 = 1.3216e-04
Loss = 8.1993e-02, PNorm = 84.8176, GNorm = 0.7926, lr_0 = 1.3207e-04
Loss = 7.9514e-02, PNorm = 84.8200, GNorm = 0.5797, lr_0 = 1.3198e-04
Loss = 8.1682e-02, PNorm = 84.8216, GNorm = 0.4752, lr_0 = 1.3189e-04
Loss = 7.8717e-02, PNorm = 84.8231, GNorm = 0.4964, lr_0 = 1.3180e-04
Loss = 8.5312e-02, PNorm = 84.8252, GNorm = 0.8299, lr_0 = 1.3171e-04
Loss = 9.7277e-02, PNorm = 84.8275, GNorm = 1.1240, lr_0 = 1.3162e-04
Loss = 7.2372e-02, PNorm = 84.8315, GNorm = 0.5400, lr_0 = 1.3153e-04
Loss = 7.1167e-02, PNorm = 84.8344, GNorm = 0.6758, lr_0 = 1.3144e-04
Loss = 8.4616e-02, PNorm = 84.8356, GNorm = 0.8503, lr_0 = 1.3135e-04
Loss = 8.5153e-02, PNorm = 84.8375, GNorm = 0.5437, lr_0 = 1.3126e-04
Loss = 7.9858e-02, PNorm = 84.8412, GNorm = 0.5891, lr_0 = 1.3117e-04
Loss = 6.9408e-02, PNorm = 84.8445, GNorm = 0.4987, lr_0 = 1.3108e-04
Loss = 7.7282e-02, PNorm = 84.8446, GNorm = 0.4808, lr_0 = 1.3099e-04
Loss = 7.6892e-02, PNorm = 84.8459, GNorm = 0.6260, lr_0 = 1.3090e-04
Loss = 8.2222e-02, PNorm = 84.8471, GNorm = 0.4669, lr_0 = 1.3081e-04
Loss = 7.9065e-02, PNorm = 84.8477, GNorm = 0.8058, lr_0 = 1.3072e-04
Loss = 7.4189e-02, PNorm = 84.8492, GNorm = 0.6647, lr_0 = 1.3063e-04
Loss = 7.7568e-02, PNorm = 84.8512, GNorm = 0.9069, lr_0 = 1.3054e-04
Loss = 7.9816e-02, PNorm = 84.8539, GNorm = 0.6583, lr_0 = 1.3045e-04
Loss = 8.0763e-02, PNorm = 84.8546, GNorm = 0.6422, lr_0 = 1.3036e-04
Loss = 7.4865e-02, PNorm = 84.8556, GNorm = 0.7351, lr_0 = 1.3027e-04
Loss = 7.9084e-02, PNorm = 84.8575, GNorm = 0.7303, lr_0 = 1.3018e-04
Loss = 7.8843e-02, PNorm = 84.8597, GNorm = 0.7569, lr_0 = 1.3009e-04
Loss = 7.4982e-02, PNorm = 84.8604, GNorm = 0.6845, lr_0 = 1.3000e-04
Loss = 7.3293e-02, PNorm = 84.8624, GNorm = 0.6220, lr_0 = 1.2992e-04
Loss = 7.7592e-02, PNorm = 84.8645, GNorm = 0.9838, lr_0 = 1.2983e-04
Loss = 8.1138e-02, PNorm = 84.8664, GNorm = 0.6373, lr_0 = 1.2974e-04
Loss = 1.0210e-01, PNorm = 84.8696, GNorm = 0.6720, lr_0 = 1.2965e-04
Loss = 8.3669e-02, PNorm = 84.8720, GNorm = 0.5804, lr_0 = 1.2956e-04
Loss = 8.0955e-02, PNorm = 84.8734, GNorm = 0.6014, lr_0 = 1.2947e-04
Loss = 8.4896e-02, PNorm = 84.8761, GNorm = 0.9995, lr_0 = 1.2938e-04
Loss = 7.4471e-02, PNorm = 84.8785, GNorm = 0.6665, lr_0 = 1.2929e-04
Loss = 7.3665e-02, PNorm = 84.8807, GNorm = 0.7579, lr_0 = 1.2921e-04
Loss = 7.6603e-02, PNorm = 84.8829, GNorm = 0.5936, lr_0 = 1.2912e-04
Loss = 8.0954e-02, PNorm = 84.8855, GNorm = 0.7760, lr_0 = 1.2903e-04
Loss = 7.5011e-02, PNorm = 84.8859, GNorm = 0.5244, lr_0 = 1.2894e-04
Loss = 8.5955e-02, PNorm = 84.8873, GNorm = 0.6683, lr_0 = 1.2885e-04
Loss = 8.1571e-02, PNorm = 84.8886, GNorm = 1.1932, lr_0 = 1.2876e-04
Loss = 7.8118e-02, PNorm = 84.8896, GNorm = 0.6149, lr_0 = 1.2867e-04
Loss = 8.3896e-02, PNorm = 84.8916, GNorm = 0.6031, lr_0 = 1.2859e-04
Loss = 8.0859e-02, PNorm = 84.8961, GNorm = 0.7121, lr_0 = 1.2850e-04
Loss = 8.3124e-02, PNorm = 84.8975, GNorm = 0.6906, lr_0 = 1.2841e-04
Loss = 7.5842e-02, PNorm = 84.8985, GNorm = 0.6052, lr_0 = 1.2832e-04
Loss = 7.6657e-02, PNorm = 84.8987, GNorm = 0.8212, lr_0 = 1.2823e-04
Loss = 8.2436e-02, PNorm = 84.8999, GNorm = 0.7121, lr_0 = 1.2815e-04
Loss = 8.9499e-02, PNorm = 84.9033, GNorm = 0.7738, lr_0 = 1.2806e-04
Loss = 8.5612e-02, PNorm = 84.9041, GNorm = 0.7120, lr_0 = 1.2797e-04
Validation mae = 0.228483
Epoch 27
Loss = 7.4928e-02, PNorm = 84.9024, GNorm = 0.9922, lr_0 = 1.2788e-04
Loss = 8.0859e-02, PNorm = 84.9025, GNorm = 0.5447, lr_0 = 1.2780e-04
Loss = 6.8993e-02, PNorm = 84.9044, GNorm = 0.5572, lr_0 = 1.2771e-04
Loss = 7.4803e-02, PNorm = 84.9071, GNorm = 0.7546, lr_0 = 1.2762e-04
Loss = 8.2807e-02, PNorm = 84.9100, GNorm = 0.7169, lr_0 = 1.2753e-04
Loss = 7.0828e-02, PNorm = 84.9126, GNorm = 0.7719, lr_0 = 1.2745e-04
Loss = 8.4637e-02, PNorm = 84.9156, GNorm = 0.6338, lr_0 = 1.2736e-04
Loss = 8.4227e-02, PNorm = 84.9189, GNorm = 0.9387, lr_0 = 1.2727e-04
Loss = 8.0936e-02, PNorm = 84.9215, GNorm = 0.8998, lr_0 = 1.2718e-04
Loss = 7.5947e-02, PNorm = 84.9241, GNorm = 0.5287, lr_0 = 1.2710e-04
Loss = 7.4154e-02, PNorm = 84.9261, GNorm = 0.4891, lr_0 = 1.2701e-04
Loss = 6.6128e-02, PNorm = 84.9288, GNorm = 0.8222, lr_0 = 1.2692e-04
Loss = 7.4423e-02, PNorm = 84.9309, GNorm = 0.7198, lr_0 = 1.2684e-04
Loss = 9.0711e-02, PNorm = 84.9344, GNorm = 0.8077, lr_0 = 1.2675e-04
Loss = 7.1545e-02, PNorm = 84.9361, GNorm = 0.5645, lr_0 = 1.2666e-04
Loss = 7.7291e-02, PNorm = 84.9384, GNorm = 0.5353, lr_0 = 1.2658e-04
Loss = 7.9106e-02, PNorm = 84.9401, GNorm = 0.7495, lr_0 = 1.2649e-04
Loss = 7.1259e-02, PNorm = 84.9423, GNorm = 0.8217, lr_0 = 1.2640e-04
Loss = 8.1655e-02, PNorm = 84.9444, GNorm = 1.0760, lr_0 = 1.2632e-04
Loss = 7.2150e-02, PNorm = 84.9462, GNorm = 0.5348, lr_0 = 1.2623e-04
Loss = 8.1505e-02, PNorm = 84.9495, GNorm = 1.0214, lr_0 = 1.2614e-04
Loss = 6.9869e-02, PNorm = 84.9526, GNorm = 0.5031, lr_0 = 1.2606e-04
Loss = 6.6298e-02, PNorm = 84.9543, GNorm = 0.4833, lr_0 = 1.2597e-04
Loss = 7.9205e-02, PNorm = 84.9562, GNorm = 0.6489, lr_0 = 1.2588e-04
Loss = 7.8863e-02, PNorm = 84.9584, GNorm = 0.6403, lr_0 = 1.2580e-04
Loss = 7.4669e-02, PNorm = 84.9599, GNorm = 0.6323, lr_0 = 1.2571e-04
Loss = 8.6715e-02, PNorm = 84.9605, GNorm = 0.6786, lr_0 = 1.2563e-04
Loss = 7.0204e-02, PNorm = 84.9615, GNorm = 0.6498, lr_0 = 1.2554e-04
Loss = 6.7746e-02, PNorm = 84.9634, GNorm = 0.4857, lr_0 = 1.2545e-04
Loss = 7.0881e-02, PNorm = 84.9647, GNorm = 0.5331, lr_0 = 1.2537e-04
Loss = 7.0962e-02, PNorm = 84.9651, GNorm = 0.4597, lr_0 = 1.2528e-04
Loss = 7.1036e-02, PNorm = 84.9667, GNorm = 0.8070, lr_0 = 1.2520e-04
Loss = 8.3315e-02, PNorm = 84.9687, GNorm = 0.6552, lr_0 = 1.2511e-04
Loss = 7.9473e-02, PNorm = 84.9717, GNorm = 0.5149, lr_0 = 1.2502e-04
Loss = 7.9434e-02, PNorm = 84.9749, GNorm = 0.7450, lr_0 = 1.2494e-04
Loss = 7.8847e-02, PNorm = 84.9779, GNorm = 0.5682, lr_0 = 1.2485e-04
Loss = 7.8401e-02, PNorm = 84.9805, GNorm = 0.5326, lr_0 = 1.2477e-04
Loss = 7.3706e-02, PNorm = 84.9830, GNorm = 0.5807, lr_0 = 1.2468e-04
Loss = 7.2741e-02, PNorm = 84.9854, GNorm = 0.6286, lr_0 = 1.2460e-04
Loss = 7.5362e-02, PNorm = 84.9883, GNorm = 0.7933, lr_0 = 1.2451e-04
Loss = 8.4814e-02, PNorm = 84.9890, GNorm = 0.8047, lr_0 = 1.2443e-04
Loss = 7.8255e-02, PNorm = 84.9903, GNorm = 0.5906, lr_0 = 1.2434e-04
Loss = 8.0094e-02, PNorm = 84.9908, GNorm = 0.6718, lr_0 = 1.2426e-04
Loss = 6.6134e-02, PNorm = 84.9938, GNorm = 0.6101, lr_0 = 1.2417e-04
Loss = 8.1684e-02, PNorm = 84.9955, GNorm = 0.5773, lr_0 = 1.2409e-04
Loss = 6.8873e-02, PNorm = 84.9974, GNorm = 0.5423, lr_0 = 1.2400e-04
Loss = 8.0526e-02, PNorm = 85.0006, GNorm = 0.4949, lr_0 = 1.2392e-04
Loss = 8.2143e-02, PNorm = 85.0042, GNorm = 0.7412, lr_0 = 1.2383e-04
Loss = 7.9720e-02, PNorm = 85.0044, GNorm = 0.8034, lr_0 = 1.2375e-04
Loss = 7.5905e-02, PNorm = 85.0045, GNorm = 0.5377, lr_0 = 1.2366e-04
Loss = 7.9390e-02, PNorm = 85.0063, GNorm = 0.5048, lr_0 = 1.2358e-04
Loss = 7.4084e-02, PNorm = 85.0091, GNorm = 0.6744, lr_0 = 1.2349e-04
Loss = 8.1243e-02, PNorm = 85.0107, GNorm = 0.6389, lr_0 = 1.2341e-04
Loss = 8.0284e-02, PNorm = 85.0124, GNorm = 0.5769, lr_0 = 1.2332e-04
Loss = 7.7267e-02, PNorm = 85.0138, GNorm = 0.4908, lr_0 = 1.2324e-04
Loss = 8.0186e-02, PNorm = 85.0147, GNorm = 0.6449, lr_0 = 1.2315e-04
Loss = 7.9212e-02, PNorm = 85.0170, GNorm = 0.6916, lr_0 = 1.2307e-04
Loss = 8.4478e-02, PNorm = 85.0193, GNorm = 0.6789, lr_0 = 1.2298e-04
Loss = 7.9262e-02, PNorm = 85.0206, GNorm = 0.6997, lr_0 = 1.2290e-04
Loss = 8.2330e-02, PNorm = 85.0225, GNorm = 0.5936, lr_0 = 1.2282e-04
Loss = 7.9132e-02, PNorm = 85.0243, GNorm = 0.5117, lr_0 = 1.2273e-04
Loss = 8.1510e-02, PNorm = 85.0258, GNorm = 0.5451, lr_0 = 1.2265e-04
Loss = 8.5454e-02, PNorm = 85.0273, GNorm = 0.9345, lr_0 = 1.2256e-04
Loss = 7.4325e-02, PNorm = 85.0287, GNorm = 0.8329, lr_0 = 1.2248e-04
Loss = 7.3087e-02, PNorm = 85.0288, GNorm = 0.8939, lr_0 = 1.2240e-04
Loss = 7.0925e-02, PNorm = 85.0299, GNorm = 0.5712, lr_0 = 1.2231e-04
Loss = 8.5669e-02, PNorm = 85.0324, GNorm = 0.7548, lr_0 = 1.2223e-04
Loss = 7.8510e-02, PNorm = 85.0346, GNorm = 0.5503, lr_0 = 1.2214e-04
Loss = 7.8911e-02, PNorm = 85.0362, GNorm = 0.7945, lr_0 = 1.2206e-04
Loss = 8.1647e-02, PNorm = 85.0375, GNorm = 0.5264, lr_0 = 1.2198e-04
Loss = 7.0534e-02, PNorm = 85.0394, GNorm = 0.5575, lr_0 = 1.2189e-04
Loss = 7.2723e-02, PNorm = 85.0416, GNorm = 0.4581, lr_0 = 1.2181e-04
Loss = 7.5846e-02, PNorm = 85.0435, GNorm = 0.7157, lr_0 = 1.2173e-04
Loss = 7.6432e-02, PNorm = 85.0453, GNorm = 0.8271, lr_0 = 1.2164e-04
Loss = 8.2506e-02, PNorm = 85.0471, GNorm = 0.6749, lr_0 = 1.2156e-04
Loss = 8.0760e-02, PNorm = 85.0502, GNorm = 0.6331, lr_0 = 1.2148e-04
Loss = 7.5817e-02, PNorm = 85.0529, GNorm = 0.7170, lr_0 = 1.2139e-04
Loss = 8.1008e-02, PNorm = 85.0524, GNorm = 0.7995, lr_0 = 1.2131e-04
Loss = 8.0010e-02, PNorm = 85.0530, GNorm = 0.7004, lr_0 = 1.2123e-04
Loss = 7.5393e-02, PNorm = 85.0544, GNorm = 0.4524, lr_0 = 1.2114e-04
Loss = 8.2066e-02, PNorm = 85.0565, GNorm = 0.7276, lr_0 = 1.2106e-04
Loss = 8.6317e-02, PNorm = 85.0598, GNorm = 0.5423, lr_0 = 1.2098e-04
Loss = 8.1076e-02, PNorm = 85.0628, GNorm = 0.6482, lr_0 = 1.2090e-04
Loss = 8.2253e-02, PNorm = 85.0646, GNorm = 0.5260, lr_0 = 1.2081e-04
Loss = 7.7668e-02, PNorm = 85.0664, GNorm = 0.6115, lr_0 = 1.2073e-04
Loss = 7.2407e-02, PNorm = 85.0695, GNorm = 0.4797, lr_0 = 1.2065e-04
Loss = 8.4177e-02, PNorm = 85.0705, GNorm = 0.5139, lr_0 = 1.2056e-04
Loss = 7.7385e-02, PNorm = 85.0700, GNorm = 0.6959, lr_0 = 1.2048e-04
Loss = 7.3232e-02, PNorm = 85.0708, GNorm = 0.5700, lr_0 = 1.2040e-04
Loss = 7.2091e-02, PNorm = 85.0718, GNorm = 0.5418, lr_0 = 1.2032e-04
Loss = 7.1696e-02, PNorm = 85.0731, GNorm = 0.5116, lr_0 = 1.2023e-04
Loss = 6.7611e-02, PNorm = 85.0747, GNorm = 0.7877, lr_0 = 1.2015e-04
Loss = 7.0099e-02, PNorm = 85.0750, GNorm = 0.5027, lr_0 = 1.2007e-04
Loss = 7.9939e-02, PNorm = 85.0735, GNorm = 0.7134, lr_0 = 1.1999e-04
Loss = 7.7940e-02, PNorm = 85.0746, GNorm = 0.7818, lr_0 = 1.1991e-04
Loss = 7.0559e-02, PNorm = 85.0769, GNorm = 0.5775, lr_0 = 1.1982e-04
Loss = 7.0033e-02, PNorm = 85.0779, GNorm = 0.5803, lr_0 = 1.1974e-04
Loss = 8.7174e-02, PNorm = 85.0796, GNorm = 0.6663, lr_0 = 1.1966e-04
Loss = 8.1679e-02, PNorm = 85.0809, GNorm = 0.6552, lr_0 = 1.1958e-04
Loss = 7.4122e-02, PNorm = 85.0819, GNorm = 0.6980, lr_0 = 1.1950e-04
Loss = 8.0217e-02, PNorm = 85.0827, GNorm = 0.5673, lr_0 = 1.1941e-04
Loss = 8.5373e-02, PNorm = 85.0841, GNorm = 0.8918, lr_0 = 1.1933e-04
Loss = 7.7567e-02, PNorm = 85.0849, GNorm = 0.8301, lr_0 = 1.1925e-04
Loss = 7.1962e-02, PNorm = 85.0869, GNorm = 0.4880, lr_0 = 1.1917e-04
Loss = 6.9555e-02, PNorm = 85.0890, GNorm = 0.6919, lr_0 = 1.1909e-04
Loss = 8.6668e-02, PNorm = 85.0903, GNorm = 0.8261, lr_0 = 1.1901e-04
Loss = 8.6120e-02, PNorm = 85.0937, GNorm = 0.6219, lr_0 = 1.1892e-04
Loss = 8.3479e-02, PNorm = 85.0956, GNorm = 0.7895, lr_0 = 1.1884e-04
Loss = 8.3199e-02, PNorm = 85.0964, GNorm = 0.6757, lr_0 = 1.1876e-04
Loss = 9.2122e-02, PNorm = 85.0985, GNorm = 1.0209, lr_0 = 1.1868e-04
Loss = 7.5705e-02, PNorm = 85.1009, GNorm = 0.6341, lr_0 = 1.1860e-04
Loss = 7.7717e-02, PNorm = 85.1014, GNorm = 0.6242, lr_0 = 1.1852e-04
Loss = 7.2624e-02, PNorm = 85.1018, GNorm = 0.8518, lr_0 = 1.1844e-04
Loss = 7.8710e-02, PNorm = 85.1030, GNorm = 0.5478, lr_0 = 1.1835e-04
Loss = 6.9324e-02, PNorm = 85.1029, GNorm = 0.6222, lr_0 = 1.1827e-04
Loss = 7.1824e-02, PNorm = 85.1037, GNorm = 0.6202, lr_0 = 1.1819e-04
Loss = 7.7564e-02, PNorm = 85.1061, GNorm = 0.6870, lr_0 = 1.1811e-04
Loss = 6.9861e-02, PNorm = 85.1092, GNorm = 0.7235, lr_0 = 1.1803e-04
Loss = 7.5003e-02, PNorm = 85.1123, GNorm = 0.4973, lr_0 = 1.1795e-04
Loss = 7.3552e-02, PNorm = 85.1145, GNorm = 0.7249, lr_0 = 1.1787e-04
Validation mae = 0.227777
Epoch 28
Loss = 7.0600e-02, PNorm = 85.1163, GNorm = 0.6116, lr_0 = 1.1779e-04
Loss = 8.0524e-02, PNorm = 85.1177, GNorm = 0.5826, lr_0 = 1.1771e-04
Loss = 7.1390e-02, PNorm = 85.1195, GNorm = 0.5089, lr_0 = 1.1763e-04
Loss = 7.5450e-02, PNorm = 85.1210, GNorm = 0.5566, lr_0 = 1.1755e-04
Loss = 7.8228e-02, PNorm = 85.1231, GNorm = 0.7528, lr_0 = 1.1747e-04
Loss = 7.0592e-02, PNorm = 85.1252, GNorm = 0.7300, lr_0 = 1.1739e-04
Loss = 6.8420e-02, PNorm = 85.1261, GNorm = 0.5976, lr_0 = 1.1730e-04
Loss = 7.5121e-02, PNorm = 85.1271, GNorm = 0.4787, lr_0 = 1.1722e-04
Loss = 7.3529e-02, PNorm = 85.1286, GNorm = 0.6720, lr_0 = 1.1714e-04
Loss = 7.1071e-02, PNorm = 85.1308, GNorm = 0.5485, lr_0 = 1.1706e-04
Loss = 6.8102e-02, PNorm = 85.1320, GNorm = 0.4850, lr_0 = 1.1698e-04
Loss = 7.2002e-02, PNorm = 85.1348, GNorm = 0.6422, lr_0 = 1.1690e-04
Loss = 8.1794e-02, PNorm = 85.1348, GNorm = 0.6883, lr_0 = 1.1682e-04
Loss = 7.5978e-02, PNorm = 85.1355, GNorm = 0.5642, lr_0 = 1.1674e-04
Loss = 8.5384e-02, PNorm = 85.1388, GNorm = 0.6563, lr_0 = 1.1666e-04
Loss = 7.4058e-02, PNorm = 85.1409, GNorm = 0.5457, lr_0 = 1.1658e-04
Loss = 6.1709e-02, PNorm = 85.1430, GNorm = 0.5589, lr_0 = 1.1650e-04
Loss = 7.0731e-02, PNorm = 85.1458, GNorm = 0.5046, lr_0 = 1.1642e-04
Loss = 7.0130e-02, PNorm = 85.1473, GNorm = 0.4709, lr_0 = 1.1634e-04
Loss = 7.2525e-02, PNorm = 85.1479, GNorm = 0.5577, lr_0 = 1.1626e-04
Loss = 7.7217e-02, PNorm = 85.1498, GNorm = 0.6288, lr_0 = 1.1618e-04
Loss = 7.3404e-02, PNorm = 85.1515, GNorm = 0.7260, lr_0 = 1.1611e-04
Loss = 8.1496e-02, PNorm = 85.1523, GNorm = 0.6881, lr_0 = 1.1603e-04
Loss = 8.0990e-02, PNorm = 85.1522, GNorm = 0.6266, lr_0 = 1.1595e-04
Loss = 7.4550e-02, PNorm = 85.1551, GNorm = 1.1257, lr_0 = 1.1587e-04
Loss = 6.6782e-02, PNorm = 85.1562, GNorm = 0.8088, lr_0 = 1.1579e-04
Loss = 7.0762e-02, PNorm = 85.1582, GNorm = 0.6034, lr_0 = 1.1571e-04
Loss = 7.8583e-02, PNorm = 85.1607, GNorm = 0.6073, lr_0 = 1.1563e-04
Loss = 7.4275e-02, PNorm = 85.1634, GNorm = 0.7629, lr_0 = 1.1555e-04
Loss = 8.3223e-02, PNorm = 85.1648, GNorm = 0.8766, lr_0 = 1.1547e-04
Loss = 7.1143e-02, PNorm = 85.1653, GNorm = 0.5735, lr_0 = 1.1539e-04
Loss = 7.5650e-02, PNorm = 85.1653, GNorm = 0.4485, lr_0 = 1.1531e-04
Loss = 6.8996e-02, PNorm = 85.1664, GNorm = 0.5290, lr_0 = 1.1523e-04
Loss = 7.8669e-02, PNorm = 85.1686, GNorm = 0.4790, lr_0 = 1.1515e-04
Loss = 6.9068e-02, PNorm = 85.1712, GNorm = 0.6767, lr_0 = 1.1508e-04
Loss = 7.2573e-02, PNorm = 85.1742, GNorm = 0.6104, lr_0 = 1.1500e-04
Loss = 7.3075e-02, PNorm = 85.1751, GNorm = 0.5367, lr_0 = 1.1492e-04
Loss = 7.4968e-02, PNorm = 85.1753, GNorm = 0.6885, lr_0 = 1.1484e-04
Loss = 7.6285e-02, PNorm = 85.1773, GNorm = 0.7856, lr_0 = 1.1476e-04
Loss = 7.6045e-02, PNorm = 85.1810, GNorm = 0.5964, lr_0 = 1.1468e-04
Loss = 8.1346e-02, PNorm = 85.1830, GNorm = 0.5799, lr_0 = 1.1460e-04
Loss = 7.2100e-02, PNorm = 85.1836, GNorm = 0.7094, lr_0 = 1.1452e-04
Loss = 7.9220e-02, PNorm = 85.1848, GNorm = 0.6055, lr_0 = 1.1445e-04
Loss = 7.5316e-02, PNorm = 85.1868, GNorm = 0.6494, lr_0 = 1.1437e-04
Loss = 7.4589e-02, PNorm = 85.1886, GNorm = 0.5783, lr_0 = 1.1429e-04
Loss = 7.3533e-02, PNorm = 85.1901, GNorm = 0.7928, lr_0 = 1.1421e-04
Loss = 8.0072e-02, PNorm = 85.1923, GNorm = 0.6317, lr_0 = 1.1413e-04
Loss = 6.6248e-02, PNorm = 85.1940, GNorm = 0.6452, lr_0 = 1.1405e-04
Loss = 8.4557e-02, PNorm = 85.1951, GNorm = 0.6009, lr_0 = 1.1398e-04
Loss = 6.4157e-02, PNorm = 85.1976, GNorm = 0.5704, lr_0 = 1.1390e-04
Loss = 8.4318e-02, PNorm = 85.2003, GNorm = 0.6647, lr_0 = 1.1382e-04
Loss = 7.1740e-02, PNorm = 85.2019, GNorm = 0.6424, lr_0 = 1.1374e-04
Loss = 8.5530e-02, PNorm = 85.2019, GNorm = 0.8068, lr_0 = 1.1366e-04
Loss = 8.5788e-02, PNorm = 85.2017, GNorm = 0.6901, lr_0 = 1.1359e-04
Loss = 8.1467e-02, PNorm = 85.2041, GNorm = 0.7198, lr_0 = 1.1351e-04
Loss = 8.2226e-02, PNorm = 85.2073, GNorm = 0.5661, lr_0 = 1.1343e-04
Loss = 7.6228e-02, PNorm = 85.2100, GNorm = 0.7729, lr_0 = 1.1335e-04
Loss = 9.2511e-02, PNorm = 85.2120, GNorm = 0.8380, lr_0 = 1.1328e-04
Loss = 7.5194e-02, PNorm = 85.2132, GNorm = 0.7446, lr_0 = 1.1320e-04
Loss = 8.4094e-02, PNorm = 85.2153, GNorm = 0.7428, lr_0 = 1.1312e-04
Loss = 6.8876e-02, PNorm = 85.2174, GNorm = 0.7039, lr_0 = 1.1304e-04
Loss = 7.9418e-02, PNorm = 85.2199, GNorm = 0.6541, lr_0 = 1.1297e-04
Loss = 7.0802e-02, PNorm = 85.2218, GNorm = 0.5645, lr_0 = 1.1289e-04
Loss = 7.9306e-02, PNorm = 85.2236, GNorm = 0.5123, lr_0 = 1.1281e-04
Loss = 7.9036e-02, PNorm = 85.2237, GNorm = 0.6436, lr_0 = 1.1273e-04
Loss = 7.9995e-02, PNorm = 85.2265, GNorm = 0.5449, lr_0 = 1.1266e-04
Loss = 7.7707e-02, PNorm = 85.2294, GNorm = 0.7097, lr_0 = 1.1258e-04
Loss = 7.3922e-02, PNorm = 85.2331, GNorm = 0.5530, lr_0 = 1.1250e-04
Loss = 8.3917e-02, PNorm = 85.2347, GNorm = 0.7201, lr_0 = 1.1243e-04
Loss = 6.7770e-02, PNorm = 85.2373, GNorm = 0.5186, lr_0 = 1.1235e-04
Loss = 6.8954e-02, PNorm = 85.2392, GNorm = 0.7930, lr_0 = 1.1227e-04
Loss = 7.5702e-02, PNorm = 85.2410, GNorm = 0.6308, lr_0 = 1.1219e-04
Loss = 7.8780e-02, PNorm = 85.2428, GNorm = 0.5411, lr_0 = 1.1212e-04
Loss = 7.4162e-02, PNorm = 85.2427, GNorm = 0.6346, lr_0 = 1.1204e-04
Loss = 7.7688e-02, PNorm = 85.2447, GNorm = 0.6912, lr_0 = 1.1196e-04
Loss = 8.2042e-02, PNorm = 85.2458, GNorm = 0.6115, lr_0 = 1.1189e-04
Loss = 7.9056e-02, PNorm = 85.2470, GNorm = 0.8210, lr_0 = 1.1181e-04
Loss = 7.5474e-02, PNorm = 85.2490, GNorm = 0.6709, lr_0 = 1.1173e-04
Loss = 7.7572e-02, PNorm = 85.2512, GNorm = 0.5437, lr_0 = 1.1166e-04
Loss = 8.3750e-02, PNorm = 85.2528, GNorm = 0.6309, lr_0 = 1.1158e-04
Loss = 7.2580e-02, PNorm = 85.2540, GNorm = 0.6305, lr_0 = 1.1150e-04
Loss = 6.8703e-02, PNorm = 85.2546, GNorm = 0.6162, lr_0 = 1.1143e-04
Loss = 7.9136e-02, PNorm = 85.2546, GNorm = 0.6368, lr_0 = 1.1135e-04
Loss = 6.6651e-02, PNorm = 85.2546, GNorm = 0.6065, lr_0 = 1.1128e-04
Loss = 7.3664e-02, PNorm = 85.2552, GNorm = 0.4816, lr_0 = 1.1120e-04
Loss = 8.0979e-02, PNorm = 85.2555, GNorm = 0.6112, lr_0 = 1.1112e-04
Loss = 8.6252e-02, PNorm = 85.2568, GNorm = 0.5756, lr_0 = 1.1105e-04
Loss = 8.2144e-02, PNorm = 85.2579, GNorm = 0.8655, lr_0 = 1.1097e-04
Loss = 7.0797e-02, PNorm = 85.2595, GNorm = 0.3917, lr_0 = 1.1089e-04
Loss = 7.6369e-02, PNorm = 85.2616, GNorm = 0.5795, lr_0 = 1.1082e-04
Loss = 7.5143e-02, PNorm = 85.2623, GNorm = 0.6722, lr_0 = 1.1074e-04
Loss = 7.0872e-02, PNorm = 85.2642, GNorm = 0.6729, lr_0 = 1.1067e-04
Loss = 8.7188e-02, PNorm = 85.2654, GNorm = 0.5889, lr_0 = 1.1059e-04
Loss = 7.6887e-02, PNorm = 85.2671, GNorm = 0.7392, lr_0 = 1.1052e-04
Loss = 8.5519e-02, PNorm = 85.2690, GNorm = 0.8454, lr_0 = 1.1044e-04
Loss = 7.3961e-02, PNorm = 85.2702, GNorm = 0.6167, lr_0 = 1.1036e-04
Loss = 7.5682e-02, PNorm = 85.2709, GNorm = 0.5060, lr_0 = 1.1029e-04
Loss = 7.7459e-02, PNorm = 85.2727, GNorm = 0.7374, lr_0 = 1.1021e-04
Loss = 7.7661e-02, PNorm = 85.2740, GNorm = 0.7014, lr_0 = 1.1014e-04
Loss = 8.0776e-02, PNorm = 85.2747, GNorm = 0.8149, lr_0 = 1.1006e-04
Loss = 7.8680e-02, PNorm = 85.2756, GNorm = 0.6100, lr_0 = 1.0999e-04
Loss = 6.5404e-02, PNorm = 85.2777, GNorm = 0.4961, lr_0 = 1.0991e-04
Loss = 7.6524e-02, PNorm = 85.2790, GNorm = 0.5974, lr_0 = 1.0984e-04
Loss = 8.0140e-02, PNorm = 85.2787, GNorm = 0.5148, lr_0 = 1.0976e-04
Loss = 6.9705e-02, PNorm = 85.2796, GNorm = 0.7702, lr_0 = 1.0969e-04
Loss = 7.4159e-02, PNorm = 85.2814, GNorm = 0.5848, lr_0 = 1.0961e-04
Loss = 7.0072e-02, PNorm = 85.2829, GNorm = 0.5872, lr_0 = 1.0954e-04
Loss = 7.0936e-02, PNorm = 85.2835, GNorm = 0.7923, lr_0 = 1.0946e-04
Loss = 6.9509e-02, PNorm = 85.2853, GNorm = 0.5433, lr_0 = 1.0939e-04
Loss = 7.9256e-02, PNorm = 85.2869, GNorm = 0.7326, lr_0 = 1.0931e-04
Loss = 6.6100e-02, PNorm = 85.2884, GNorm = 0.5366, lr_0 = 1.0924e-04
Loss = 7.9873e-02, PNorm = 85.2898, GNorm = 0.6717, lr_0 = 1.0916e-04
Loss = 7.5407e-02, PNorm = 85.2913, GNorm = 0.6528, lr_0 = 1.0909e-04
Loss = 7.7756e-02, PNorm = 85.2936, GNorm = 0.6293, lr_0 = 1.0901e-04
Loss = 7.8401e-02, PNorm = 85.2965, GNorm = 0.7672, lr_0 = 1.0894e-04
Loss = 8.2831e-02, PNorm = 85.2996, GNorm = 0.4749, lr_0 = 1.0886e-04
Loss = 6.7962e-02, PNorm = 85.3015, GNorm = 0.4489, lr_0 = 1.0879e-04
Loss = 7.6498e-02, PNorm = 85.3020, GNorm = 0.6441, lr_0 = 1.0871e-04
Loss = 8.9143e-02, PNorm = 85.3032, GNorm = 0.8894, lr_0 = 1.0864e-04
Loss = 8.4195e-02, PNorm = 85.3030, GNorm = 0.4360, lr_0 = 1.0856e-04
Validation mae = 0.225642
Epoch 29
Loss = 6.5171e-02, PNorm = 85.3039, GNorm = 0.5215, lr_0 = 1.0849e-04
Loss = 6.4091e-02, PNorm = 85.3046, GNorm = 0.5785, lr_0 = 1.0841e-04
Loss = 6.8630e-02, PNorm = 85.3058, GNorm = 0.7709, lr_0 = 1.0834e-04
Loss = 7.7348e-02, PNorm = 85.3075, GNorm = 0.6885, lr_0 = 1.0827e-04
Loss = 7.5011e-02, PNorm = 85.3104, GNorm = 0.9287, lr_0 = 1.0819e-04
Loss = 7.7364e-02, PNorm = 85.3117, GNorm = 0.6883, lr_0 = 1.0812e-04
Loss = 8.2856e-02, PNorm = 85.3128, GNorm = 0.6623, lr_0 = 1.0804e-04
Loss = 7.1401e-02, PNorm = 85.3136, GNorm = 0.5446, lr_0 = 1.0797e-04
Loss = 7.1949e-02, PNorm = 85.3143, GNorm = 0.6801, lr_0 = 1.0790e-04
Loss = 6.6510e-02, PNorm = 85.3173, GNorm = 0.5429, lr_0 = 1.0782e-04
Loss = 6.8358e-02, PNorm = 85.3189, GNorm = 0.6492, lr_0 = 1.0775e-04
Loss = 6.7369e-02, PNorm = 85.3188, GNorm = 0.5129, lr_0 = 1.0767e-04
Loss = 8.2719e-02, PNorm = 85.3201, GNorm = 0.5370, lr_0 = 1.0760e-04
Loss = 7.2171e-02, PNorm = 85.3212, GNorm = 0.6520, lr_0 = 1.0753e-04
Loss = 8.1473e-02, PNorm = 85.3218, GNorm = 0.5158, lr_0 = 1.0745e-04
Loss = 7.4896e-02, PNorm = 85.3233, GNorm = 0.5341, lr_0 = 1.0738e-04
Loss = 7.5658e-02, PNorm = 85.3256, GNorm = 0.6896, lr_0 = 1.0731e-04
Loss = 7.0274e-02, PNorm = 85.3281, GNorm = 0.7306, lr_0 = 1.0723e-04
Loss = 7.7977e-02, PNorm = 85.3298, GNorm = 0.5201, lr_0 = 1.0716e-04
Loss = 7.5467e-02, PNorm = 85.3313, GNorm = 0.5801, lr_0 = 1.0709e-04
Loss = 7.4189e-02, PNorm = 85.3342, GNorm = 0.4710, lr_0 = 1.0701e-04
Loss = 7.2417e-02, PNorm = 85.3371, GNorm = 0.5417, lr_0 = 1.0694e-04
Loss = 7.8811e-02, PNorm = 85.3389, GNorm = 0.5992, lr_0 = 1.0687e-04
Loss = 7.7145e-02, PNorm = 85.3390, GNorm = 0.8385, lr_0 = 1.0679e-04
Loss = 7.2310e-02, PNorm = 85.3401, GNorm = 0.6307, lr_0 = 1.0672e-04
Loss = 7.0253e-02, PNorm = 85.3424, GNorm = 0.4281, lr_0 = 1.0665e-04
Loss = 6.6305e-02, PNorm = 85.3443, GNorm = 0.7148, lr_0 = 1.0657e-04
Loss = 6.4461e-02, PNorm = 85.3459, GNorm = 0.5315, lr_0 = 1.0650e-04
Loss = 6.9941e-02, PNorm = 85.3485, GNorm = 0.6492, lr_0 = 1.0643e-04
Loss = 7.0692e-02, PNorm = 85.3505, GNorm = 0.9110, lr_0 = 1.0635e-04
Loss = 6.5338e-02, PNorm = 85.3514, GNorm = 0.5744, lr_0 = 1.0628e-04
Loss = 6.9525e-02, PNorm = 85.3525, GNorm = 0.5589, lr_0 = 1.0621e-04
Loss = 7.8421e-02, PNorm = 85.3524, GNorm = 0.7789, lr_0 = 1.0614e-04
Loss = 7.1766e-02, PNorm = 85.3533, GNorm = 0.5584, lr_0 = 1.0606e-04
Loss = 6.8241e-02, PNorm = 85.3536, GNorm = 0.7589, lr_0 = 1.0599e-04
Loss = 8.3617e-02, PNorm = 85.3539, GNorm = 0.7079, lr_0 = 1.0592e-04
Loss = 8.0241e-02, PNorm = 85.3554, GNorm = 0.6021, lr_0 = 1.0585e-04
Loss = 7.7355e-02, PNorm = 85.3568, GNorm = 0.7135, lr_0 = 1.0577e-04
Loss = 8.2908e-02, PNorm = 85.3566, GNorm = 0.8525, lr_0 = 1.0570e-04
Loss = 6.8063e-02, PNorm = 85.3579, GNorm = 0.6997, lr_0 = 1.0563e-04
Loss = 7.9726e-02, PNorm = 85.3607, GNorm = 0.6469, lr_0 = 1.0556e-04
Loss = 6.9017e-02, PNorm = 85.3630, GNorm = 0.6603, lr_0 = 1.0548e-04
Loss = 7.0647e-02, PNorm = 85.3650, GNorm = 0.4339, lr_0 = 1.0541e-04
Loss = 7.1063e-02, PNorm = 85.3663, GNorm = 0.5266, lr_0 = 1.0534e-04
Loss = 6.9461e-02, PNorm = 85.3677, GNorm = 0.5865, lr_0 = 1.0527e-04
Loss = 7.9710e-02, PNorm = 85.3686, GNorm = 0.6073, lr_0 = 1.0519e-04
Loss = 7.0971e-02, PNorm = 85.3703, GNorm = 0.6794, lr_0 = 1.0512e-04
Loss = 7.7027e-02, PNorm = 85.3732, GNorm = 0.9190, lr_0 = 1.0505e-04
Loss = 7.5733e-02, PNorm = 85.3755, GNorm = 0.7632, lr_0 = 1.0498e-04
Loss = 8.0115e-02, PNorm = 85.3772, GNorm = 0.7039, lr_0 = 1.0491e-04
Loss = 7.3956e-02, PNorm = 85.3789, GNorm = 0.6964, lr_0 = 1.0483e-04
Loss = 7.7042e-02, PNorm = 85.3802, GNorm = 0.6854, lr_0 = 1.0476e-04
Loss = 8.0211e-02, PNorm = 85.3814, GNorm = 0.7724, lr_0 = 1.0469e-04
Loss = 7.0713e-02, PNorm = 85.3830, GNorm = 0.6117, lr_0 = 1.0462e-04
Loss = 7.0881e-02, PNorm = 85.3830, GNorm = 0.5440, lr_0 = 1.0455e-04
Loss = 8.1234e-02, PNorm = 85.3839, GNorm = 0.5759, lr_0 = 1.0448e-04
Loss = 8.4487e-02, PNorm = 85.3857, GNorm = 0.6269, lr_0 = 1.0440e-04
Loss = 8.3282e-02, PNorm = 85.3861, GNorm = 0.6631, lr_0 = 1.0433e-04
Loss = 7.0709e-02, PNorm = 85.3882, GNorm = 0.5474, lr_0 = 1.0426e-04
Loss = 7.6885e-02, PNorm = 85.3896, GNorm = 1.0132, lr_0 = 1.0419e-04
Loss = 8.1118e-02, PNorm = 85.3898, GNorm = 0.5762, lr_0 = 1.0412e-04
Loss = 6.7257e-02, PNorm = 85.3912, GNorm = 0.4865, lr_0 = 1.0405e-04
Loss = 6.7065e-02, PNorm = 85.3933, GNorm = 0.5401, lr_0 = 1.0398e-04
Loss = 7.0310e-02, PNorm = 85.3946, GNorm = 0.5756, lr_0 = 1.0391e-04
Loss = 7.3699e-02, PNorm = 85.3955, GNorm = 0.5122, lr_0 = 1.0383e-04
Loss = 7.7337e-02, PNorm = 85.3976, GNorm = 0.5761, lr_0 = 1.0376e-04
Loss = 7.3139e-02, PNorm = 85.3988, GNorm = 0.4821, lr_0 = 1.0369e-04
Loss = 6.7958e-02, PNorm = 85.4000, GNorm = 0.5275, lr_0 = 1.0362e-04
Loss = 8.2867e-02, PNorm = 85.4015, GNorm = 0.6907, lr_0 = 1.0355e-04
Loss = 6.5831e-02, PNorm = 85.4030, GNorm = 0.5525, lr_0 = 1.0348e-04
Loss = 8.3214e-02, PNorm = 85.4053, GNorm = 0.7218, lr_0 = 1.0341e-04
Loss = 7.1221e-02, PNorm = 85.4065, GNorm = 0.5689, lr_0 = 1.0334e-04
Loss = 8.3251e-02, PNorm = 85.4083, GNorm = 0.6594, lr_0 = 1.0327e-04
Loss = 7.5528e-02, PNorm = 85.4088, GNorm = 0.7133, lr_0 = 1.0320e-04
Loss = 7.4688e-02, PNorm = 85.4104, GNorm = 0.6340, lr_0 = 1.0312e-04
Loss = 7.4176e-02, PNorm = 85.4121, GNorm = 0.5115, lr_0 = 1.0305e-04
Loss = 7.4455e-02, PNorm = 85.4146, GNorm = 0.7748, lr_0 = 1.0298e-04
Loss = 7.4722e-02, PNorm = 85.4170, GNorm = 0.5347, lr_0 = 1.0291e-04
Loss = 7.4240e-02, PNorm = 85.4183, GNorm = 0.8867, lr_0 = 1.0284e-04
Loss = 8.8346e-02, PNorm = 85.4205, GNorm = 0.8118, lr_0 = 1.0277e-04
Loss = 8.0909e-02, PNorm = 85.4213, GNorm = 0.5994, lr_0 = 1.0270e-04
Loss = 7.3314e-02, PNorm = 85.4217, GNorm = 0.6399, lr_0 = 1.0263e-04
Loss = 6.9094e-02, PNorm = 85.4235, GNorm = 0.6442, lr_0 = 1.0256e-04
Loss = 7.2352e-02, PNorm = 85.4249, GNorm = 1.0974, lr_0 = 1.0249e-04
Loss = 7.0286e-02, PNorm = 85.4266, GNorm = 0.6650, lr_0 = 1.0242e-04
Loss = 7.6636e-02, PNorm = 85.4289, GNorm = 0.5394, lr_0 = 1.0235e-04
Loss = 7.8347e-02, PNorm = 85.4294, GNorm = 0.7463, lr_0 = 1.0228e-04
Loss = 7.6446e-02, PNorm = 85.4287, GNorm = 0.5185, lr_0 = 1.0221e-04
Loss = 8.3648e-02, PNorm = 85.4297, GNorm = 0.5535, lr_0 = 1.0214e-04
Loss = 8.1833e-02, PNorm = 85.4313, GNorm = 0.7901, lr_0 = 1.0207e-04
Loss = 7.5167e-02, PNorm = 85.4322, GNorm = 0.5431, lr_0 = 1.0200e-04
Loss = 8.3140e-02, PNorm = 85.4347, GNorm = 0.5702, lr_0 = 1.0193e-04
Loss = 7.4410e-02, PNorm = 85.4359, GNorm = 0.7963, lr_0 = 1.0186e-04
Loss = 7.3144e-02, PNorm = 85.4370, GNorm = 0.6283, lr_0 = 1.0179e-04
Loss = 7.5240e-02, PNorm = 85.4382, GNorm = 0.6232, lr_0 = 1.0172e-04
Loss = 7.7776e-02, PNorm = 85.4379, GNorm = 0.5399, lr_0 = 1.0165e-04
Loss = 8.2643e-02, PNorm = 85.4392, GNorm = 0.5934, lr_0 = 1.0158e-04
Loss = 8.4900e-02, PNorm = 85.4424, GNorm = 0.7057, lr_0 = 1.0151e-04
Loss = 7.6865e-02, PNorm = 85.4451, GNorm = 0.5013, lr_0 = 1.0144e-04
Loss = 6.4802e-02, PNorm = 85.4460, GNorm = 0.6032, lr_0 = 1.0137e-04
Loss = 8.4312e-02, PNorm = 85.4470, GNorm = 0.6794, lr_0 = 1.0130e-04
Loss = 8.0150e-02, PNorm = 85.4489, GNorm = 0.9100, lr_0 = 1.0123e-04
Loss = 9.4850e-02, PNorm = 85.4505, GNorm = 0.7943, lr_0 = 1.0116e-04
Loss = 7.4316e-02, PNorm = 85.4513, GNorm = 0.6391, lr_0 = 1.0110e-04
Loss = 8.2783e-02, PNorm = 85.4530, GNorm = 0.7544, lr_0 = 1.0103e-04
Loss = 6.6187e-02, PNorm = 85.4541, GNorm = 0.5585, lr_0 = 1.0096e-04
Loss = 8.2725e-02, PNorm = 85.4559, GNorm = 0.6084, lr_0 = 1.0089e-04
Loss = 6.8892e-02, PNorm = 85.4566, GNorm = 0.8137, lr_0 = 1.0082e-04
Loss = 8.1668e-02, PNorm = 85.4580, GNorm = 0.7835, lr_0 = 1.0075e-04
Loss = 7.8431e-02, PNorm = 85.4602, GNorm = 0.5062, lr_0 = 1.0068e-04
Loss = 7.3484e-02, PNorm = 85.4612, GNorm = 0.5777, lr_0 = 1.0061e-04
Loss = 7.8170e-02, PNorm = 85.4622, GNorm = 0.6218, lr_0 = 1.0054e-04
Loss = 8.0183e-02, PNorm = 85.4629, GNorm = 0.6217, lr_0 = 1.0047e-04
Loss = 7.3552e-02, PNorm = 85.4636, GNorm = 0.8085, lr_0 = 1.0041e-04
Loss = 7.3009e-02, PNorm = 85.4633, GNorm = 0.9002, lr_0 = 1.0034e-04
Loss = 6.2525e-02, PNorm = 85.4638, GNorm = 0.4683, lr_0 = 1.0027e-04
Loss = 7.9100e-02, PNorm = 85.4654, GNorm = 0.4961, lr_0 = 1.0020e-04
Loss = 7.2343e-02, PNorm = 85.4687, GNorm = 0.7587, lr_0 = 1.0013e-04
Loss = 7.3829e-02, PNorm = 85.4708, GNorm = 0.5118, lr_0 = 1.0006e-04
Loss = 6.9205e-02, PNorm = 85.4720, GNorm = 0.6472, lr_0 = 1.0000e-04
Validation mae = 0.224761
Model 0 best validation mae = 0.224761 on epoch 29
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.224802
Ensemble test mae = 0.224802
Fold 8
Splitting data with seed 8
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.1, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=500, bias=False)
        (W_h): Linear(in_features=500, out_features=500, bias=False)
        (W_o): Linear(in_features=633, out_features=500, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.1, inplace=False)
    (1): Linear(in_features=500, out_features=500, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.1, inplace=False)
    (4): Linear(in_features=500, out_features=500, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.1, inplace=False)
    (7): Linear(in_features=500, out_features=1, bias=True)
  )
)
Number of parameters = 1,142,001
Moving model to cuda
Epoch 0
Loss = 9.6968e-01, PNorm = 47.8614, GNorm = 2.9695, lr_0 = 1.0413e-04
Loss = 9.9384e-01, PNorm = 47.8621, GNorm = 5.9646, lr_0 = 1.0788e-04
Loss = 1.0425e+00, PNorm = 47.8635, GNorm = 2.4979, lr_0 = 1.1163e-04
Loss = 8.2637e-01, PNorm = 47.8657, GNorm = 8.8481, lr_0 = 1.1537e-04
Loss = 8.9923e-01, PNorm = 47.8686, GNorm = 7.4311, lr_0 = 1.1913e-04
Loss = 7.2827e-01, PNorm = 47.8730, GNorm = 3.4671, lr_0 = 1.2287e-04
Loss = 7.6607e-01, PNorm = 47.8783, GNorm = 7.8313, lr_0 = 1.2663e-04
Loss = 7.8285e-01, PNorm = 47.8839, GNorm = 7.5686, lr_0 = 1.3038e-04
Loss = 8.8949e-01, PNorm = 47.8870, GNorm = 2.7745, lr_0 = 1.3413e-04
Loss = 9.2390e-01, PNorm = 47.8919, GNorm = 9.2984, lr_0 = 1.3788e-04
Loss = 8.0313e-01, PNorm = 47.8983, GNorm = 2.4286, lr_0 = 1.4163e-04
Loss = 6.8132e-01, PNorm = 47.9059, GNorm = 5.3597, lr_0 = 1.4537e-04
Loss = 6.3636e-01, PNorm = 47.9133, GNorm = 2.8963, lr_0 = 1.4913e-04
Loss = 5.6308e-01, PNorm = 47.9192, GNorm = 5.0936, lr_0 = 1.5288e-04
Loss = 4.8511e-01, PNorm = 47.9257, GNorm = 5.9978, lr_0 = 1.5662e-04
Loss = 4.6644e-01, PNorm = 47.9318, GNorm = 6.2579, lr_0 = 1.6038e-04
Loss = 5.2090e-01, PNorm = 47.9351, GNorm = 3.1077, lr_0 = 1.6412e-04
Loss = 5.5802e-01, PNorm = 47.9398, GNorm = 3.1864, lr_0 = 1.6788e-04
Loss = 4.9989e-01, PNorm = 47.9461, GNorm = 5.8249, lr_0 = 1.7163e-04
Loss = 4.9363e-01, PNorm = 47.9532, GNorm = 7.1523, lr_0 = 1.7538e-04
Loss = 4.2380e-01, PNorm = 47.9601, GNorm = 3.8868, lr_0 = 1.7913e-04
Loss = 4.1410e-01, PNorm = 47.9663, GNorm = 3.1247, lr_0 = 1.8288e-04
Loss = 3.9305e-01, PNorm = 47.9729, GNorm = 9.1938, lr_0 = 1.8662e-04
Loss = 4.4517e-01, PNorm = 47.9797, GNorm = 4.6007, lr_0 = 1.9038e-04
Loss = 4.2936e-01, PNorm = 47.9872, GNorm = 7.9778, lr_0 = 1.9413e-04
Loss = 3.6330e-01, PNorm = 47.9932, GNorm = 7.0785, lr_0 = 1.9788e-04
Loss = 3.8332e-01, PNorm = 47.9982, GNorm = 16.7840, lr_0 = 2.0163e-04
Loss = 4.5435e-01, PNorm = 48.0041, GNorm = 2.6717, lr_0 = 2.0537e-04
Loss = 3.7482e-01, PNorm = 48.0114, GNorm = 4.0276, lr_0 = 2.0913e-04
Loss = 4.0011e-01, PNorm = 48.0161, GNorm = 10.1904, lr_0 = 2.1288e-04
Loss = 4.2271e-01, PNorm = 48.0217, GNorm = 3.8906, lr_0 = 2.1663e-04
Loss = 4.6293e-01, PNorm = 48.0266, GNorm = 17.4159, lr_0 = 2.2038e-04
Loss = 3.2079e-01, PNorm = 48.0343, GNorm = 3.3745, lr_0 = 2.2412e-04
Loss = 4.2499e-01, PNorm = 48.0395, GNorm = 12.2459, lr_0 = 2.2787e-04
Loss = 3.9373e-01, PNorm = 48.0458, GNorm = 7.4961, lr_0 = 2.3163e-04
Loss = 4.2274e-01, PNorm = 48.0543, GNorm = 7.3426, lr_0 = 2.3538e-04
Loss = 3.3520e-01, PNorm = 48.0621, GNorm = 1.2755, lr_0 = 2.3913e-04
Loss = 3.2525e-01, PNorm = 48.0664, GNorm = 6.8445, lr_0 = 2.4288e-04
Loss = 3.2974e-01, PNorm = 48.0723, GNorm = 3.6587, lr_0 = 2.4662e-04
Loss = 4.7671e-01, PNorm = 48.0771, GNorm = 17.9303, lr_0 = 2.5038e-04
Loss = 3.9742e-01, PNorm = 48.0810, GNorm = 3.9970, lr_0 = 2.5413e-04
Loss = 4.0230e-01, PNorm = 48.0900, GNorm = 4.0621, lr_0 = 2.5788e-04
Loss = 3.4608e-01, PNorm = 48.0981, GNorm = 2.2428, lr_0 = 2.6163e-04
Loss = 3.4021e-01, PNorm = 48.1030, GNorm = 6.3513, lr_0 = 2.6537e-04
Loss = 3.6247e-01, PNorm = 48.1093, GNorm = 11.2758, lr_0 = 2.6912e-04
Loss = 3.9444e-01, PNorm = 48.1171, GNorm = 11.5786, lr_0 = 2.7288e-04
Loss = 3.0047e-01, PNorm = 48.1276, GNorm = 4.5724, lr_0 = 2.7663e-04
Loss = 3.8020e-01, PNorm = 48.1314, GNorm = 2.2839, lr_0 = 2.8038e-04
Loss = 3.4763e-01, PNorm = 48.1379, GNorm = 1.6973, lr_0 = 2.8413e-04
Loss = 2.9087e-01, PNorm = 48.1446, GNorm = 1.8063, lr_0 = 2.8787e-04
Loss = 2.9945e-01, PNorm = 48.1521, GNorm = 1.3129, lr_0 = 2.9163e-04
Loss = 3.1370e-01, PNorm = 48.1579, GNorm = 1.8906, lr_0 = 2.9538e-04
Loss = 3.0387e-01, PNorm = 48.1642, GNorm = 2.0545, lr_0 = 2.9913e-04
Loss = 3.2252e-01, PNorm = 48.1707, GNorm = 5.8846, lr_0 = 3.0288e-04
Loss = 3.2882e-01, PNorm = 48.1774, GNorm = 8.0750, lr_0 = 3.0662e-04
Loss = 3.2416e-01, PNorm = 48.1831, GNorm = 10.8570, lr_0 = 3.1037e-04
Loss = 3.5902e-01, PNorm = 48.1917, GNorm = 5.9507, lr_0 = 3.1413e-04
Loss = 3.2795e-01, PNorm = 48.1976, GNorm = 1.8465, lr_0 = 3.1788e-04
Loss = 3.2234e-01, PNorm = 48.2065, GNorm = 4.9532, lr_0 = 3.2163e-04
Loss = 3.4062e-01, PNorm = 48.2141, GNorm = 2.6724, lr_0 = 3.2538e-04
Loss = 3.2793e-01, PNorm = 48.2241, GNorm = 12.7332, lr_0 = 3.2912e-04
Loss = 3.1545e-01, PNorm = 48.2311, GNorm = 3.9618, lr_0 = 3.3288e-04
Loss = 2.8494e-01, PNorm = 48.2380, GNorm = 1.3243, lr_0 = 3.3663e-04
Loss = 2.8961e-01, PNorm = 48.2458, GNorm = 9.2393, lr_0 = 3.4038e-04
Loss = 2.8661e-01, PNorm = 48.2532, GNorm = 3.7194, lr_0 = 3.4413e-04
Loss = 3.1969e-01, PNorm = 48.2588, GNorm = 2.8526, lr_0 = 3.4787e-04
Loss = 3.0005e-01, PNorm = 48.2636, GNorm = 7.0123, lr_0 = 3.5162e-04
Loss = 2.9165e-01, PNorm = 48.2716, GNorm = 2.5611, lr_0 = 3.5538e-04
Loss = 2.8321e-01, PNorm = 48.2798, GNorm = 1.2470, lr_0 = 3.5913e-04
Loss = 2.8112e-01, PNorm = 48.2898, GNorm = 9.6389, lr_0 = 3.6288e-04
Loss = 2.8907e-01, PNorm = 48.2962, GNorm = 3.5505, lr_0 = 3.6662e-04
Loss = 2.4991e-01, PNorm = 48.3062, GNorm = 4.2250, lr_0 = 3.7037e-04
Loss = 2.6555e-01, PNorm = 48.3128, GNorm = 2.5074, lr_0 = 3.7413e-04
Loss = 3.1667e-01, PNorm = 48.3160, GNorm = 8.7001, lr_0 = 3.7788e-04
Loss = 4.2018e-01, PNorm = 48.3277, GNorm = 15.9551, lr_0 = 3.8163e-04
Loss = 4.0545e-01, PNorm = 48.3436, GNorm = 1.6613, lr_0 = 3.8537e-04
Loss = 3.0897e-01, PNorm = 48.3597, GNorm = 2.1087, lr_0 = 3.8912e-04
Loss = 2.9601e-01, PNorm = 48.3698, GNorm = 1.6237, lr_0 = 3.9287e-04
Loss = 3.3576e-01, PNorm = 48.3775, GNorm = 3.5582, lr_0 = 3.9663e-04
Loss = 3.4871e-01, PNorm = 48.3847, GNorm = 1.4399, lr_0 = 4.0038e-04
Loss = 3.2612e-01, PNorm = 48.3917, GNorm = 9.4462, lr_0 = 4.0413e-04
Loss = 3.0127e-01, PNorm = 48.4006, GNorm = 2.7689, lr_0 = 4.0787e-04
Loss = 3.2219e-01, PNorm = 48.4120, GNorm = 2.0270, lr_0 = 4.1162e-04
Loss = 2.7503e-01, PNorm = 48.4262, GNorm = 6.0591, lr_0 = 4.1537e-04
Loss = 3.0445e-01, PNorm = 48.4359, GNorm = 6.4084, lr_0 = 4.1913e-04
Loss = 2.8509e-01, PNorm = 48.4446, GNorm = 4.7946, lr_0 = 4.2288e-04
Loss = 2.8818e-01, PNorm = 48.4552, GNorm = 1.4220, lr_0 = 4.2662e-04
Loss = 3.2448e-01, PNorm = 48.4633, GNorm = 11.6298, lr_0 = 4.3037e-04
Loss = 2.8789e-01, PNorm = 48.4725, GNorm = 9.4371, lr_0 = 4.3412e-04
Loss = 2.4824e-01, PNorm = 48.4864, GNorm = 3.2862, lr_0 = 4.3788e-04
Loss = 2.9100e-01, PNorm = 48.5000, GNorm = 2.5779, lr_0 = 4.4163e-04
Loss = 2.6917e-01, PNorm = 48.5156, GNorm = 3.6049, lr_0 = 4.4538e-04
Loss = 2.6865e-01, PNorm = 48.5280, GNorm = 3.5959, lr_0 = 4.4912e-04
Loss = 2.8523e-01, PNorm = 48.5361, GNorm = 1.0830, lr_0 = 4.5287e-04
Loss = 2.8319e-01, PNorm = 48.5438, GNorm = 4.4198, lr_0 = 4.5662e-04
Loss = 2.7566e-01, PNorm = 48.5517, GNorm = 4.1752, lr_0 = 4.6038e-04
Loss = 2.8923e-01, PNorm = 48.5554, GNorm = 4.1567, lr_0 = 4.6413e-04
Loss = 2.7238e-01, PNorm = 48.5629, GNorm = 4.3788, lr_0 = 4.6787e-04
Loss = 2.9016e-01, PNorm = 48.5761, GNorm = 3.2845, lr_0 = 4.7162e-04
Loss = 2.5200e-01, PNorm = 48.5867, GNorm = 3.4929, lr_0 = 4.7537e-04
Loss = 2.9715e-01, PNorm = 48.5975, GNorm = 6.0085, lr_0 = 4.7913e-04
Loss = 3.3144e-01, PNorm = 48.6034, GNorm = 1.1306, lr_0 = 4.8288e-04
Loss = 2.6940e-01, PNorm = 48.6140, GNorm = 1.5880, lr_0 = 4.8663e-04
Loss = 2.4599e-01, PNorm = 48.6274, GNorm = 1.4840, lr_0 = 4.9038e-04
Loss = 2.6885e-01, PNorm = 48.6349, GNorm = 1.6701, lr_0 = 4.9412e-04
Loss = 3.2967e-01, PNorm = 48.6460, GNorm = 1.6387, lr_0 = 4.9788e-04
Loss = 2.8080e-01, PNorm = 48.6582, GNorm = 2.1849, lr_0 = 5.0163e-04
Loss = 2.8542e-01, PNorm = 48.6703, GNorm = 3.1254, lr_0 = 5.0538e-04
Loss = 3.1584e-01, PNorm = 48.6820, GNorm = 2.3826, lr_0 = 5.0913e-04
Loss = 2.7844e-01, PNorm = 48.6917, GNorm = 2.0448, lr_0 = 5.1287e-04
Loss = 2.7974e-01, PNorm = 48.6987, GNorm = 5.3699, lr_0 = 5.1663e-04
Loss = 2.6411e-01, PNorm = 48.7099, GNorm = 3.5099, lr_0 = 5.2038e-04
Loss = 2.7444e-01, PNorm = 48.7192, GNorm = 1.5551, lr_0 = 5.2413e-04
Loss = 2.5208e-01, PNorm = 48.7291, GNorm = 2.4148, lr_0 = 5.2788e-04
Loss = 2.9454e-01, PNorm = 48.7351, GNorm = 2.2871, lr_0 = 5.3162e-04
Loss = 2.8255e-01, PNorm = 48.7463, GNorm = 1.8698, lr_0 = 5.3538e-04
Loss = 2.7790e-01, PNorm = 48.7585, GNorm = 3.3744, lr_0 = 5.3912e-04
Loss = 2.8138e-01, PNorm = 48.7704, GNorm = 5.0320, lr_0 = 5.4288e-04
Loss = 2.7935e-01, PNorm = 48.7861, GNorm = 1.2020, lr_0 = 5.4663e-04
Loss = 2.8510e-01, PNorm = 48.7991, GNorm = 3.5262, lr_0 = 5.5038e-04
Validation mae = 0.339833
Epoch 1
Loss = 2.4995e-01, PNorm = 48.8107, GNorm = 3.7526, lr_0 = 5.5413e-04
Loss = 2.5065e-01, PNorm = 48.8243, GNorm = 1.9284, lr_0 = 5.5787e-04
Loss = 2.7014e-01, PNorm = 48.8325, GNorm = 2.2146, lr_0 = 5.6163e-04
Loss = 2.9831e-01, PNorm = 48.8435, GNorm = 1.8507, lr_0 = 5.6538e-04
Loss = 2.6039e-01, PNorm = 48.8571, GNorm = 4.1416, lr_0 = 5.6913e-04
Loss = 2.8106e-01, PNorm = 48.8623, GNorm = 5.3945, lr_0 = 5.7288e-04
Loss = 2.6238e-01, PNorm = 48.8761, GNorm = 1.4002, lr_0 = 5.7662e-04
Loss = 2.6335e-01, PNorm = 48.8913, GNorm = 3.7205, lr_0 = 5.8038e-04
Loss = 2.6992e-01, PNorm = 48.9033, GNorm = 2.7766, lr_0 = 5.8413e-04
Loss = 2.5987e-01, PNorm = 48.9194, GNorm = 4.2321, lr_0 = 5.8788e-04
Loss = 2.4965e-01, PNorm = 48.9325, GNorm = 4.2688, lr_0 = 5.9163e-04
Loss = 2.8165e-01, PNorm = 48.9474, GNorm = 7.1401, lr_0 = 5.9538e-04
Loss = 2.9714e-01, PNorm = 48.9586, GNorm = 3.6075, lr_0 = 5.9913e-04
Loss = 3.0123e-01, PNorm = 48.9784, GNorm = 1.5956, lr_0 = 6.0288e-04
Loss = 2.9171e-01, PNorm = 48.9943, GNorm = 1.2487, lr_0 = 6.0663e-04
Loss = 2.5322e-01, PNorm = 49.0099, GNorm = 2.4479, lr_0 = 6.1038e-04
Loss = 2.5705e-01, PNorm = 49.0258, GNorm = 5.2977, lr_0 = 6.1413e-04
Loss = 3.3521e-01, PNorm = 49.0448, GNorm = 2.5418, lr_0 = 6.1788e-04
Loss = 3.4482e-01, PNorm = 49.0641, GNorm = 3.2162, lr_0 = 6.2163e-04
Loss = 2.9691e-01, PNorm = 49.0845, GNorm = 1.1159, lr_0 = 6.2538e-04
Loss = 2.5724e-01, PNorm = 49.0997, GNorm = 1.1748, lr_0 = 6.2913e-04
Loss = 2.8248e-01, PNorm = 49.1109, GNorm = 1.6937, lr_0 = 6.3288e-04
Loss = 2.6398e-01, PNorm = 49.1230, GNorm = 1.6900, lr_0 = 6.3663e-04
Loss = 2.8747e-01, PNorm = 49.1370, GNorm = 2.4205, lr_0 = 6.4038e-04
Loss = 2.6870e-01, PNorm = 49.1513, GNorm = 3.5082, lr_0 = 6.4413e-04
Loss = 2.5298e-01, PNorm = 49.1741, GNorm = 2.3499, lr_0 = 6.4788e-04
Loss = 2.4600e-01, PNorm = 49.1815, GNorm = 0.9779, lr_0 = 6.5163e-04
Loss = 2.5910e-01, PNorm = 49.1883, GNorm = 1.3684, lr_0 = 6.5538e-04
Loss = 2.5544e-01, PNorm = 49.2030, GNorm = 3.9942, lr_0 = 6.5913e-04
Loss = 3.4094e-01, PNorm = 49.2173, GNorm = 1.6284, lr_0 = 6.6288e-04
Loss = 3.1896e-01, PNorm = 49.2388, GNorm = 4.8720, lr_0 = 6.6663e-04
Loss = 3.3850e-01, PNorm = 49.2639, GNorm = 3.5979, lr_0 = 6.7038e-04
Loss = 2.6605e-01, PNorm = 49.2834, GNorm = 0.8248, lr_0 = 6.7413e-04
Loss = 2.8288e-01, PNorm = 49.2894, GNorm = 2.4624, lr_0 = 6.7788e-04
Loss = 2.5762e-01, PNorm = 49.3084, GNorm = 2.4791, lr_0 = 6.8163e-04
Loss = 2.9660e-01, PNorm = 49.3122, GNorm = 4.7539, lr_0 = 6.8538e-04
Loss = 2.9123e-01, PNorm = 49.3282, GNorm = 4.7027, lr_0 = 6.8913e-04
Loss = 2.9642e-01, PNorm = 49.3397, GNorm = 1.4883, lr_0 = 6.9288e-04
Loss = 3.0358e-01, PNorm = 49.3591, GNorm = 6.8040, lr_0 = 6.9663e-04
Loss = 2.5810e-01, PNorm = 49.3771, GNorm = 3.3220, lr_0 = 7.0038e-04
Loss = 2.5975e-01, PNorm = 49.3920, GNorm = 2.5143, lr_0 = 7.0413e-04
Loss = 2.6725e-01, PNorm = 49.4018, GNorm = 1.5614, lr_0 = 7.0788e-04
Loss = 2.7131e-01, PNorm = 49.4157, GNorm = 1.7844, lr_0 = 7.1163e-04
Loss = 2.9875e-01, PNorm = 49.4312, GNorm = 4.7616, lr_0 = 7.1538e-04
Loss = 3.0149e-01, PNorm = 49.4499, GNorm = 4.3943, lr_0 = 7.1913e-04
Loss = 3.4546e-01, PNorm = 49.4686, GNorm = 4.5002, lr_0 = 7.2288e-04
Loss = 3.5935e-01, PNorm = 49.4952, GNorm = 5.8787, lr_0 = 7.2663e-04
Loss = 3.5550e-01, PNorm = 49.5274, GNorm = 3.6525, lr_0 = 7.3038e-04
Loss = 3.2117e-01, PNorm = 49.5553, GNorm = 1.2773, lr_0 = 7.3413e-04
Loss = 2.6431e-01, PNorm = 49.5775, GNorm = 0.7850, lr_0 = 7.3788e-04
Loss = 2.3808e-01, PNorm = 49.5988, GNorm = 4.5458, lr_0 = 7.4163e-04
Loss = 2.5301e-01, PNorm = 49.6118, GNorm = 2.1806, lr_0 = 7.4538e-04
Loss = 2.4810e-01, PNorm = 49.6262, GNorm = 0.8261, lr_0 = 7.4913e-04
Loss = 2.6319e-01, PNorm = 49.6438, GNorm = 1.0214, lr_0 = 7.5288e-04
Loss = 2.4949e-01, PNorm = 49.6602, GNorm = 1.3765, lr_0 = 7.5663e-04
Loss = 2.6351e-01, PNorm = 49.6763, GNorm = 2.0910, lr_0 = 7.6038e-04
Loss = 2.5273e-01, PNorm = 49.6882, GNorm = 0.9036, lr_0 = 7.6413e-04
Loss = 2.4213e-01, PNorm = 49.7060, GNorm = 1.0853, lr_0 = 7.6788e-04
Loss = 3.2032e-01, PNorm = 49.7223, GNorm = 1.0908, lr_0 = 7.7163e-04
Loss = 3.0098e-01, PNorm = 49.7389, GNorm = 2.5474, lr_0 = 7.7538e-04
Loss = 2.6222e-01, PNorm = 49.7539, GNorm = 1.0357, lr_0 = 7.7913e-04
Loss = 2.6013e-01, PNorm = 49.7708, GNorm = 3.5206, lr_0 = 7.8288e-04
Loss = 2.5468e-01, PNorm = 49.7847, GNorm = 0.9786, lr_0 = 7.8663e-04
Loss = 2.4982e-01, PNorm = 49.8056, GNorm = 1.1702, lr_0 = 7.9038e-04
Loss = 2.7923e-01, PNorm = 49.8227, GNorm = 4.9122, lr_0 = 7.9413e-04
Loss = 3.5013e-01, PNorm = 49.8471, GNorm = 2.3043, lr_0 = 7.9788e-04
Loss = 2.7544e-01, PNorm = 49.8826, GNorm = 0.9815, lr_0 = 8.0163e-04
Loss = 3.0095e-01, PNorm = 49.9044, GNorm = 3.4424, lr_0 = 8.0538e-04
Loss = 2.6816e-01, PNorm = 49.9230, GNorm = 1.0273, lr_0 = 8.0913e-04
Loss = 2.9460e-01, PNorm = 49.9332, GNorm = 3.7626, lr_0 = 8.1288e-04
Loss = 2.5137e-01, PNorm = 49.9557, GNorm = 0.8489, lr_0 = 8.1663e-04
Loss = 2.4115e-01, PNorm = 49.9755, GNorm = 2.8570, lr_0 = 8.2038e-04
Loss = 3.1173e-01, PNorm = 49.9941, GNorm = 1.1411, lr_0 = 8.2413e-04
Loss = 2.5897e-01, PNorm = 50.0191, GNorm = 0.9285, lr_0 = 8.2788e-04
Loss = 2.3162e-01, PNorm = 50.0342, GNorm = 0.6745, lr_0 = 8.3163e-04
Loss = 2.1485e-01, PNorm = 50.0575, GNorm = 1.6135, lr_0 = 8.3538e-04
Loss = 2.5796e-01, PNorm = 50.0768, GNorm = 2.1549, lr_0 = 8.3913e-04
Loss = 2.4055e-01, PNorm = 50.0999, GNorm = 5.8963, lr_0 = 8.4288e-04
Loss = 2.6340e-01, PNorm = 50.1218, GNorm = 2.9052, lr_0 = 8.4663e-04
Loss = 2.5604e-01, PNorm = 50.1433, GNorm = 0.9006, lr_0 = 8.5038e-04
Loss = 2.2924e-01, PNorm = 50.1670, GNorm = 1.0335, lr_0 = 8.5413e-04
Loss = 2.8823e-01, PNorm = 50.1810, GNorm = 1.5210, lr_0 = 8.5788e-04
Loss = 2.6064e-01, PNorm = 50.1976, GNorm = 1.4125, lr_0 = 8.6163e-04
Loss = 3.4270e-01, PNorm = 50.2208, GNorm = 4.3898, lr_0 = 8.6538e-04
Loss = 2.9073e-01, PNorm = 50.2533, GNorm = 4.5538, lr_0 = 8.6913e-04
Loss = 2.8269e-01, PNorm = 50.2826, GNorm = 1.4487, lr_0 = 8.7288e-04
Loss = 2.5053e-01, PNorm = 50.3152, GNorm = 2.5117, lr_0 = 8.7663e-04
Loss = 2.4218e-01, PNorm = 50.3365, GNorm = 0.8666, lr_0 = 8.8038e-04
Loss = 2.2611e-01, PNorm = 50.3531, GNorm = 5.1999, lr_0 = 8.8413e-04
Loss = 2.4069e-01, PNorm = 50.3684, GNorm = 3.4244, lr_0 = 8.8788e-04
Loss = 2.3018e-01, PNorm = 50.3910, GNorm = 2.2405, lr_0 = 8.9163e-04
Loss = 2.7263e-01, PNorm = 50.4045, GNorm = 2.2417, lr_0 = 8.9538e-04
Loss = 2.7801e-01, PNorm = 50.4286, GNorm = 4.5796, lr_0 = 8.9913e-04
Loss = 2.8004e-01, PNorm = 50.4628, GNorm = 4.9471, lr_0 = 9.0288e-04
Loss = 2.6112e-01, PNorm = 50.4891, GNorm = 2.6477, lr_0 = 9.0663e-04
Loss = 2.6781e-01, PNorm = 50.5219, GNorm = 1.7085, lr_0 = 9.1038e-04
Loss = 2.5534e-01, PNorm = 50.5390, GNorm = 0.7366, lr_0 = 9.1413e-04
Loss = 2.4035e-01, PNorm = 50.5621, GNorm = 2.1758, lr_0 = 9.1788e-04
Loss = 2.6254e-01, PNorm = 50.5812, GNorm = 1.6827, lr_0 = 9.2163e-04
Loss = 2.4238e-01, PNorm = 50.6034, GNorm = 2.7143, lr_0 = 9.2538e-04
Loss = 2.5806e-01, PNorm = 50.6254, GNorm = 0.7848, lr_0 = 9.2913e-04
Loss = 2.4482e-01, PNorm = 50.6455, GNorm = 1.2521, lr_0 = 9.3288e-04
Loss = 2.2419e-01, PNorm = 50.6698, GNorm = 3.6011, lr_0 = 9.3663e-04
Loss = 2.2550e-01, PNorm = 50.6851, GNorm = 1.5398, lr_0 = 9.4038e-04
Loss = 2.5788e-01, PNorm = 50.7180, GNorm = 1.2888, lr_0 = 9.4413e-04
Loss = 2.3366e-01, PNorm = 50.7401, GNorm = 0.8997, lr_0 = 9.4788e-04
Loss = 2.5235e-01, PNorm = 50.7588, GNorm = 2.6504, lr_0 = 9.5163e-04
Loss = 2.5969e-01, PNorm = 50.7905, GNorm = 1.3733, lr_0 = 9.5538e-04
Loss = 3.1038e-01, PNorm = 50.8161, GNorm = 4.4284, lr_0 = 9.5913e-04
Loss = 2.9172e-01, PNorm = 50.8483, GNorm = 2.5571, lr_0 = 9.6288e-04
Loss = 2.8648e-01, PNorm = 50.8740, GNorm = 0.8204, lr_0 = 9.6663e-04
Loss = 2.4913e-01, PNorm = 50.9090, GNorm = 3.3413, lr_0 = 9.7038e-04
Loss = 2.9074e-01, PNorm = 50.9354, GNorm = 3.1462, lr_0 = 9.7413e-04
Loss = 2.7242e-01, PNorm = 50.9586, GNorm = 2.4153, lr_0 = 9.7788e-04
Loss = 2.6478e-01, PNorm = 50.9881, GNorm = 3.5626, lr_0 = 9.8163e-04
Loss = 2.7268e-01, PNorm = 51.0205, GNorm = 1.0028, lr_0 = 9.8537e-04
Loss = 2.5882e-01, PNorm = 51.0415, GNorm = 1.0030, lr_0 = 9.8912e-04
Loss = 2.5031e-01, PNorm = 51.0737, GNorm = 3.3940, lr_0 = 9.9288e-04
Loss = 2.2979e-01, PNorm = 51.0899, GNorm = 1.2897, lr_0 = 9.9663e-04
Loss = 2.2486e-01, PNorm = 51.1119, GNorm = 0.9518, lr_0 = 9.9993e-04
Validation mae = 0.286986
Epoch 2
Loss = 2.4499e-01, PNorm = 51.1308, GNorm = 1.5344, lr_0 = 9.9925e-04
Loss = 2.0720e-01, PNorm = 51.1498, GNorm = 1.0997, lr_0 = 9.9856e-04
Loss = 2.3333e-01, PNorm = 51.1767, GNorm = 1.2178, lr_0 = 9.9788e-04
Loss = 2.2524e-01, PNorm = 51.2109, GNorm = 0.9991, lr_0 = 9.9719e-04
Loss = 2.2964e-01, PNorm = 51.2379, GNorm = 1.1788, lr_0 = 9.9651e-04
Loss = 2.2030e-01, PNorm = 51.2614, GNorm = 1.8856, lr_0 = 9.9583e-04
Loss = 2.2169e-01, PNorm = 51.2854, GNorm = 2.6306, lr_0 = 9.9515e-04
Loss = 2.5969e-01, PNorm = 51.3143, GNorm = 2.3088, lr_0 = 9.9446e-04
Loss = 2.1588e-01, PNorm = 51.3437, GNorm = 0.9286, lr_0 = 9.9378e-04
Loss = 2.3589e-01, PNorm = 51.3742, GNorm = 1.3283, lr_0 = 9.9310e-04
Loss = 2.4228e-01, PNorm = 51.4020, GNorm = 1.5964, lr_0 = 9.9242e-04
Loss = 2.4001e-01, PNorm = 51.4348, GNorm = 4.4918, lr_0 = 9.9174e-04
Loss = 2.6759e-01, PNorm = 51.4682, GNorm = 3.2395, lr_0 = 9.9106e-04
Loss = 2.3131e-01, PNorm = 51.4998, GNorm = 0.6604, lr_0 = 9.9038e-04
Loss = 2.2183e-01, PNorm = 51.5285, GNorm = 2.0484, lr_0 = 9.8971e-04
Loss = 2.5350e-01, PNorm = 51.5514, GNorm = 1.3771, lr_0 = 9.8903e-04
Loss = 2.2693e-01, PNorm = 51.5782, GNorm = 0.8852, lr_0 = 9.8835e-04
Loss = 2.1604e-01, PNorm = 51.6055, GNorm = 1.5050, lr_0 = 9.8767e-04
Loss = 2.3542e-01, PNorm = 51.6384, GNorm = 3.8979, lr_0 = 9.8700e-04
Loss = 2.7557e-01, PNorm = 51.6659, GNorm = 2.3932, lr_0 = 9.8632e-04
Loss = 2.3865e-01, PNorm = 51.6977, GNorm = 0.8337, lr_0 = 9.8564e-04
Loss = 2.1332e-01, PNorm = 51.7251, GNorm = 2.5905, lr_0 = 9.8497e-04
Loss = 2.3143e-01, PNorm = 51.7570, GNorm = 3.0492, lr_0 = 9.8429e-04
Loss = 2.2597e-01, PNorm = 51.7911, GNorm = 1.7720, lr_0 = 9.8362e-04
Loss = 2.4530e-01, PNorm = 51.8225, GNorm = 1.2814, lr_0 = 9.8295e-04
Loss = 2.3901e-01, PNorm = 51.8547, GNorm = 1.8080, lr_0 = 9.8227e-04
Loss = 2.1606e-01, PNorm = 51.8841, GNorm = 1.2192, lr_0 = 9.8160e-04
Loss = 2.4339e-01, PNorm = 51.9126, GNorm = 5.9785, lr_0 = 9.8093e-04
Loss = 2.3866e-01, PNorm = 51.9473, GNorm = 2.4102, lr_0 = 9.8026e-04
Loss = 2.4462e-01, PNorm = 51.9797, GNorm = 0.8956, lr_0 = 9.7958e-04
Loss = 2.4742e-01, PNorm = 52.0045, GNorm = 0.9074, lr_0 = 9.7891e-04
Loss = 2.7693e-01, PNorm = 52.0382, GNorm = 4.4123, lr_0 = 9.7824e-04
Loss = 2.9500e-01, PNorm = 52.0557, GNorm = 1.3616, lr_0 = 9.7757e-04
Loss = 2.5921e-01, PNorm = 52.0878, GNorm = 0.9109, lr_0 = 9.7690e-04
Loss = 2.3440e-01, PNorm = 52.1140, GNorm = 2.0603, lr_0 = 9.7623e-04
Loss = 2.2610e-01, PNorm = 52.1335, GNorm = 1.8094, lr_0 = 9.7556e-04
Loss = 2.3573e-01, PNorm = 52.1622, GNorm = 1.4980, lr_0 = 9.7490e-04
Loss = 2.2315e-01, PNorm = 52.1820, GNorm = 0.9188, lr_0 = 9.7423e-04
Loss = 2.1177e-01, PNorm = 52.2061, GNorm = 1.3172, lr_0 = 9.7356e-04
Loss = 2.0240e-01, PNorm = 52.2178, GNorm = 1.0628, lr_0 = 9.7289e-04
Loss = 2.0198e-01, PNorm = 52.2389, GNorm = 2.0659, lr_0 = 9.7223e-04
Loss = 2.4326e-01, PNorm = 52.2603, GNorm = 0.7764, lr_0 = 9.7156e-04
Loss = 2.3578e-01, PNorm = 52.2842, GNorm = 1.3845, lr_0 = 9.7090e-04
Loss = 2.1654e-01, PNorm = 52.3111, GNorm = 1.3872, lr_0 = 9.7023e-04
Loss = 2.2923e-01, PNorm = 52.3422, GNorm = 1.0437, lr_0 = 9.6957e-04
Loss = 2.3005e-01, PNorm = 52.3624, GNorm = 0.7623, lr_0 = 9.6890e-04
Loss = 2.0230e-01, PNorm = 52.3897, GNorm = 1.4361, lr_0 = 9.6824e-04
Loss = 2.3304e-01, PNorm = 52.4190, GNorm = 2.3691, lr_0 = 9.6757e-04
Loss = 2.5552e-01, PNorm = 52.4283, GNorm = 1.7869, lr_0 = 9.6691e-04
Loss = 2.1642e-01, PNorm = 52.4552, GNorm = 1.4743, lr_0 = 9.6625e-04
Loss = 2.2270e-01, PNorm = 52.4886, GNorm = 0.7265, lr_0 = 9.6559e-04
Loss = 2.2634e-01, PNorm = 52.5072, GNorm = 1.0266, lr_0 = 9.6493e-04
Loss = 2.2859e-01, PNorm = 52.5308, GNorm = 0.9189, lr_0 = 9.6427e-04
Loss = 2.1786e-01, PNorm = 52.5555, GNorm = 1.6949, lr_0 = 9.6360e-04
Loss = 2.2306e-01, PNorm = 52.5849, GNorm = 2.4667, lr_0 = 9.6294e-04
Loss = 2.2142e-01, PNorm = 52.6086, GNorm = 2.7618, lr_0 = 9.6228e-04
Loss = 2.2659e-01, PNorm = 52.6310, GNorm = 1.3475, lr_0 = 9.6163e-04
Loss = 2.2238e-01, PNorm = 52.6495, GNorm = 0.8719, lr_0 = 9.6097e-04
Loss = 2.2726e-01, PNorm = 52.6726, GNorm = 3.5678, lr_0 = 9.6031e-04
Loss = 2.0599e-01, PNorm = 52.6919, GNorm = 2.0324, lr_0 = 9.5965e-04
Loss = 2.0500e-01, PNorm = 52.7166, GNorm = 1.3779, lr_0 = 9.5899e-04
Loss = 2.0040e-01, PNorm = 52.7440, GNorm = 1.5144, lr_0 = 9.5834e-04
Loss = 2.4814e-01, PNorm = 52.7667, GNorm = 1.3005, lr_0 = 9.5768e-04
Loss = 2.2888e-01, PNorm = 52.7935, GNorm = 2.5295, lr_0 = 9.5702e-04
Loss = 2.2942e-01, PNorm = 52.8176, GNorm = 1.7661, lr_0 = 9.5637e-04
Loss = 1.9352e-01, PNorm = 52.8414, GNorm = 1.1600, lr_0 = 9.5571e-04
Loss = 2.3877e-01, PNorm = 52.8757, GNorm = 2.5817, lr_0 = 9.5506e-04
Loss = 2.4528e-01, PNorm = 52.9044, GNorm = 1.0077, lr_0 = 9.5440e-04
Loss = 2.3340e-01, PNorm = 52.9255, GNorm = 1.1691, lr_0 = 9.5375e-04
Loss = 2.1597e-01, PNorm = 52.9527, GNorm = 1.2689, lr_0 = 9.5310e-04
Loss = 2.5617e-01, PNorm = 52.9769, GNorm = 1.1445, lr_0 = 9.5244e-04
Loss = 2.3033e-01, PNorm = 53.0006, GNorm = 1.2159, lr_0 = 9.5179e-04
Loss = 2.5165e-01, PNorm = 53.0307, GNorm = 1.4603, lr_0 = 9.5114e-04
Loss = 2.5227e-01, PNorm = 53.0677, GNorm = 4.0884, lr_0 = 9.5049e-04
Loss = 2.5597e-01, PNorm = 53.0933, GNorm = 0.8172, lr_0 = 9.4984e-04
Loss = 2.5333e-01, PNorm = 53.1345, GNorm = 2.4365, lr_0 = 9.4919e-04
Loss = 2.0686e-01, PNorm = 53.1671, GNorm = 0.9079, lr_0 = 9.4854e-04
Loss = 2.1797e-01, PNorm = 53.1939, GNorm = 0.5885, lr_0 = 9.4789e-04
Loss = 2.5474e-01, PNorm = 53.2233, GNorm = 3.8679, lr_0 = 9.4724e-04
Loss = 2.1795e-01, PNorm = 53.2546, GNorm = 0.6817, lr_0 = 9.4659e-04
Loss = 2.3776e-01, PNorm = 53.2870, GNorm = 2.6215, lr_0 = 9.4594e-04
Loss = 2.1972e-01, PNorm = 53.3060, GNorm = 0.9231, lr_0 = 9.4529e-04
Loss = 2.5619e-01, PNorm = 53.3369, GNorm = 3.3577, lr_0 = 9.4464e-04
Loss = 2.2562e-01, PNorm = 53.3720, GNorm = 1.4927, lr_0 = 9.4400e-04
Loss = 2.1031e-01, PNorm = 53.4060, GNorm = 0.6249, lr_0 = 9.4335e-04
Loss = 2.1237e-01, PNorm = 53.4337, GNorm = 1.1183, lr_0 = 9.4270e-04
Loss = 2.2742e-01, PNorm = 53.4520, GNorm = 1.3986, lr_0 = 9.4206e-04
Loss = 2.1156e-01, PNorm = 53.4767, GNorm = 1.1678, lr_0 = 9.4141e-04
Loss = 2.1419e-01, PNorm = 53.4979, GNorm = 0.8548, lr_0 = 9.4077e-04
Loss = 2.0858e-01, PNorm = 53.5182, GNorm = 0.6504, lr_0 = 9.4012e-04
Loss = 2.2428e-01, PNorm = 53.5407, GNorm = 0.9715, lr_0 = 9.3948e-04
Loss = 2.2161e-01, PNorm = 53.5555, GNorm = 1.8770, lr_0 = 9.3884e-04
Loss = 2.3697e-01, PNorm = 53.5808, GNorm = 2.0149, lr_0 = 9.3819e-04
Loss = 2.4208e-01, PNorm = 53.5999, GNorm = 1.2739, lr_0 = 9.3755e-04
Loss = 2.1956e-01, PNorm = 53.6311, GNorm = 1.6970, lr_0 = 9.3691e-04
Loss = 2.1463e-01, PNorm = 53.6426, GNorm = 2.8817, lr_0 = 9.3627e-04
Loss = 2.1569e-01, PNorm = 53.6604, GNorm = 0.7677, lr_0 = 9.3562e-04
Loss = 2.1595e-01, PNorm = 53.6946, GNorm = 2.7516, lr_0 = 9.3498e-04
Loss = 2.4080e-01, PNorm = 53.7253, GNorm = 1.2559, lr_0 = 9.3434e-04
Loss = 2.1773e-01, PNorm = 53.7518, GNorm = 1.2382, lr_0 = 9.3370e-04
Loss = 2.3799e-01, PNorm = 53.7823, GNorm = 1.4040, lr_0 = 9.3306e-04
Loss = 2.2167e-01, PNorm = 53.8046, GNorm = 1.2163, lr_0 = 9.3242e-04
Loss = 2.1028e-01, PNorm = 53.8305, GNorm = 1.0172, lr_0 = 9.3178e-04
Loss = 2.3127e-01, PNorm = 53.8528, GNorm = 1.4103, lr_0 = 9.3115e-04
Loss = 2.3885e-01, PNorm = 53.8791, GNorm = 3.1724, lr_0 = 9.3051e-04
Loss = 2.6166e-01, PNorm = 53.9001, GNorm = 1.5906, lr_0 = 9.2987e-04
Loss = 2.1811e-01, PNorm = 53.9329, GNorm = 1.1074, lr_0 = 9.2923e-04
Loss = 2.3622e-01, PNorm = 53.9524, GNorm = 1.3367, lr_0 = 9.2860e-04
Loss = 2.1993e-01, PNorm = 53.9911, GNorm = 1.8452, lr_0 = 9.2796e-04
Loss = 2.5532e-01, PNorm = 54.0126, GNorm = 3.3466, lr_0 = 9.2733e-04
Loss = 2.3812e-01, PNorm = 54.0533, GNorm = 2.1434, lr_0 = 9.2669e-04
Loss = 2.1737e-01, PNorm = 54.0749, GNorm = 2.6888, lr_0 = 9.2606e-04
Loss = 2.4461e-01, PNorm = 54.1003, GNorm = 2.1218, lr_0 = 9.2542e-04
Loss = 2.1545e-01, PNorm = 54.1303, GNorm = 0.9249, lr_0 = 9.2479e-04
Loss = 2.0980e-01, PNorm = 54.1588, GNorm = 1.1707, lr_0 = 9.2415e-04
Loss = 2.2340e-01, PNorm = 54.1715, GNorm = 0.8725, lr_0 = 9.2352e-04
Loss = 2.2032e-01, PNorm = 54.1915, GNorm = 1.1821, lr_0 = 9.2289e-04
Loss = 2.0019e-01, PNorm = 54.2160, GNorm = 0.9429, lr_0 = 9.2226e-04
Loss = 2.1844e-01, PNorm = 54.2448, GNorm = 0.7357, lr_0 = 9.2162e-04
Loss = 2.1536e-01, PNorm = 54.2700, GNorm = 0.7345, lr_0 = 9.2099e-04
Validation mae = 0.288711
Epoch 3
Loss = 2.2071e-01, PNorm = 54.3015, GNorm = 2.0660, lr_0 = 9.2036e-04
Loss = 2.1174e-01, PNorm = 54.3309, GNorm = 1.1194, lr_0 = 9.1973e-04
Loss = 2.0976e-01, PNorm = 54.3486, GNorm = 2.9460, lr_0 = 9.1910e-04
Loss = 1.9361e-01, PNorm = 54.3763, GNorm = 1.1144, lr_0 = 9.1847e-04
Loss = 1.7944e-01, PNorm = 54.4035, GNorm = 2.0273, lr_0 = 9.1784e-04
Loss = 2.0568e-01, PNorm = 54.4320, GNorm = 0.7964, lr_0 = 9.1721e-04
Loss = 1.8864e-01, PNorm = 54.4585, GNorm = 0.7524, lr_0 = 9.1658e-04
Loss = 2.0479e-01, PNorm = 54.4810, GNorm = 2.6498, lr_0 = 9.1596e-04
Loss = 2.4502e-01, PNorm = 54.5184, GNorm = 1.2761, lr_0 = 9.1533e-04
Loss = 2.1087e-01, PNorm = 54.5409, GNorm = 1.0307, lr_0 = 9.1470e-04
Loss = 2.4120e-01, PNorm = 54.5688, GNorm = 0.6802, lr_0 = 9.1408e-04
Loss = 2.2148e-01, PNorm = 54.5991, GNorm = 0.8458, lr_0 = 9.1345e-04
Loss = 2.1815e-01, PNorm = 54.6225, GNorm = 2.9511, lr_0 = 9.1282e-04
Loss = 2.0795e-01, PNorm = 54.6511, GNorm = 0.6086, lr_0 = 9.1220e-04
Loss = 1.9521e-01, PNorm = 54.6760, GNorm = 2.1490, lr_0 = 9.1157e-04
Loss = 2.2900e-01, PNorm = 54.7049, GNorm = 0.8001, lr_0 = 9.1095e-04
Loss = 2.1971e-01, PNorm = 54.7411, GNorm = 1.3933, lr_0 = 9.1032e-04
Loss = 2.2041e-01, PNorm = 54.7690, GNorm = 0.5069, lr_0 = 9.0970e-04
Loss = 2.0967e-01, PNorm = 54.7955, GNorm = 0.6607, lr_0 = 9.0908e-04
Loss = 2.2139e-01, PNorm = 54.8158, GNorm = 0.8273, lr_0 = 9.0846e-04
Loss = 2.3218e-01, PNorm = 54.8470, GNorm = 1.0627, lr_0 = 9.0783e-04
Loss = 2.0170e-01, PNorm = 54.8709, GNorm = 1.1313, lr_0 = 9.0721e-04
Loss = 1.9874e-01, PNorm = 54.8960, GNorm = 1.2208, lr_0 = 9.0659e-04
Loss = 2.3218e-01, PNorm = 54.9171, GNorm = 1.3460, lr_0 = 9.0597e-04
Loss = 1.9089e-01, PNorm = 54.9415, GNorm = 0.9472, lr_0 = 9.0535e-04
Loss = 2.0102e-01, PNorm = 54.9634, GNorm = 1.0386, lr_0 = 9.0473e-04
Loss = 2.0545e-01, PNorm = 54.9956, GNorm = 0.7958, lr_0 = 9.0411e-04
Loss = 2.1169e-01, PNorm = 55.0168, GNorm = 1.2763, lr_0 = 9.0349e-04
Loss = 2.0806e-01, PNorm = 55.0366, GNorm = 1.6769, lr_0 = 9.0287e-04
Loss = 2.2828e-01, PNorm = 55.0779, GNorm = 1.0347, lr_0 = 9.0225e-04
Loss = 2.0483e-01, PNorm = 55.1040, GNorm = 0.8187, lr_0 = 9.0163e-04
Loss = 2.1914e-01, PNorm = 55.1449, GNorm = 1.3293, lr_0 = 9.0102e-04
Loss = 2.2261e-01, PNorm = 55.1764, GNorm = 2.1398, lr_0 = 9.0040e-04
Loss = 2.1295e-01, PNorm = 55.2128, GNorm = 0.9847, lr_0 = 8.9978e-04
Loss = 2.0508e-01, PNorm = 55.2358, GNorm = 0.9848, lr_0 = 8.9916e-04
Loss = 2.2645e-01, PNorm = 55.2658, GNorm = 1.0264, lr_0 = 8.9855e-04
Loss = 2.2975e-01, PNorm = 55.2936, GNorm = 1.5686, lr_0 = 8.9793e-04
Loss = 2.1982e-01, PNorm = 55.3355, GNorm = 1.7613, lr_0 = 8.9732e-04
Loss = 1.9982e-01, PNorm = 55.3653, GNorm = 2.4785, lr_0 = 8.9670e-04
Loss = 2.2943e-01, PNorm = 55.3997, GNorm = 3.0392, lr_0 = 8.9609e-04
Loss = 2.0597e-01, PNorm = 55.4325, GNorm = 0.9844, lr_0 = 8.9548e-04
Loss = 2.1870e-01, PNorm = 55.4629, GNorm = 0.8740, lr_0 = 8.9486e-04
Loss = 1.9745e-01, PNorm = 55.4907, GNorm = 1.3500, lr_0 = 8.9425e-04
Loss = 2.1923e-01, PNorm = 55.5143, GNorm = 2.3321, lr_0 = 8.9364e-04
Loss = 1.5789e-01, PNorm = 55.5300, GNorm = 1.7252, lr_0 = 8.9302e-04
Loss = 1.9645e-01, PNorm = 55.5562, GNorm = 1.1789, lr_0 = 8.9241e-04
Loss = 1.9250e-01, PNorm = 55.5860, GNorm = 2.2946, lr_0 = 8.9180e-04
Loss = 2.0982e-01, PNorm = 55.6068, GNorm = 0.9813, lr_0 = 8.9119e-04
Loss = 2.0048e-01, PNorm = 55.6289, GNorm = 1.0489, lr_0 = 8.9058e-04
Loss = 2.3142e-01, PNorm = 55.6569, GNorm = 0.9938, lr_0 = 8.8997e-04
Loss = 2.0397e-01, PNorm = 55.6867, GNorm = 2.1691, lr_0 = 8.8936e-04
Loss = 1.7721e-01, PNorm = 55.7237, GNorm = 0.8357, lr_0 = 8.8875e-04
Loss = 2.0883e-01, PNorm = 55.7411, GNorm = 0.5375, lr_0 = 8.8814e-04
Loss = 2.2007e-01, PNorm = 55.7723, GNorm = 0.7800, lr_0 = 8.8753e-04
Loss = 1.9882e-01, PNorm = 55.7900, GNorm = 0.5232, lr_0 = 8.8693e-04
Loss = 2.1049e-01, PNorm = 55.8220, GNorm = 2.1805, lr_0 = 8.8632e-04
Loss = 2.0468e-01, PNorm = 55.8511, GNorm = 2.2084, lr_0 = 8.8571e-04
Loss = 1.9288e-01, PNorm = 55.8825, GNorm = 0.6538, lr_0 = 8.8510e-04
Loss = 1.8753e-01, PNorm = 55.9053, GNorm = 0.9083, lr_0 = 8.8450e-04
Loss = 1.8559e-01, PNorm = 55.9319, GNorm = 0.8633, lr_0 = 8.8389e-04
Loss = 2.1120e-01, PNorm = 55.9584, GNorm = 1.9230, lr_0 = 8.8329e-04
Loss = 2.0717e-01, PNorm = 55.9852, GNorm = 1.1072, lr_0 = 8.8268e-04
Loss = 1.8335e-01, PNorm = 56.0111, GNorm = 0.9067, lr_0 = 8.8208e-04
Loss = 1.8127e-01, PNorm = 56.0362, GNorm = 2.4022, lr_0 = 8.8147e-04
Loss = 2.1234e-01, PNorm = 56.0587, GNorm = 1.2812, lr_0 = 8.8087e-04
Loss = 1.8002e-01, PNorm = 56.0870, GNorm = 1.0650, lr_0 = 8.8026e-04
Loss = 1.9530e-01, PNorm = 56.1103, GNorm = 0.6179, lr_0 = 8.7966e-04
Loss = 1.8652e-01, PNorm = 56.1343, GNorm = 1.3332, lr_0 = 8.7906e-04
Loss = 2.1675e-01, PNorm = 56.1619, GNorm = 3.4374, lr_0 = 8.7846e-04
Loss = 2.1074e-01, PNorm = 56.1919, GNorm = 1.9416, lr_0 = 8.7785e-04
Loss = 1.7651e-01, PNorm = 56.2349, GNorm = 1.1615, lr_0 = 8.7725e-04
Loss = 2.3126e-01, PNorm = 56.2671, GNorm = 1.4404, lr_0 = 8.7665e-04
Loss = 1.9676e-01, PNorm = 56.2980, GNorm = 2.0137, lr_0 = 8.7605e-04
Loss = 1.9688e-01, PNorm = 56.3175, GNorm = 2.1907, lr_0 = 8.7545e-04
Loss = 1.9637e-01, PNorm = 56.3425, GNorm = 0.6716, lr_0 = 8.7485e-04
Loss = 1.8444e-01, PNorm = 56.3673, GNorm = 0.6419, lr_0 = 8.7425e-04
Loss = 2.0220e-01, PNorm = 56.3897, GNorm = 0.8136, lr_0 = 8.7365e-04
Loss = 2.1021e-01, PNorm = 56.4132, GNorm = 1.0374, lr_0 = 8.7306e-04
Loss = 1.8259e-01, PNorm = 56.4431, GNorm = 2.1276, lr_0 = 8.7246e-04
Loss = 2.3800e-01, PNorm = 56.4729, GNorm = 0.9226, lr_0 = 8.7186e-04
Loss = 2.5280e-01, PNorm = 56.5201, GNorm = 1.0496, lr_0 = 8.7126e-04
Loss = 1.9601e-01, PNorm = 56.5521, GNorm = 1.4079, lr_0 = 8.7067e-04
Loss = 2.1091e-01, PNorm = 56.5838, GNorm = 0.6879, lr_0 = 8.7007e-04
Loss = 1.9113e-01, PNorm = 56.6033, GNorm = 0.6926, lr_0 = 8.6947e-04
Loss = 2.1126e-01, PNorm = 56.6255, GNorm = 1.5493, lr_0 = 8.6888e-04
Loss = 2.0099e-01, PNorm = 56.6512, GNorm = 0.5792, lr_0 = 8.6828e-04
Loss = 1.9640e-01, PNorm = 56.6777, GNorm = 0.8729, lr_0 = 8.6769e-04
Loss = 1.9919e-01, PNorm = 56.7101, GNorm = 1.5273, lr_0 = 8.6709e-04
Loss = 2.0921e-01, PNorm = 56.7329, GNorm = 0.8743, lr_0 = 8.6650e-04
Loss = 2.2485e-01, PNorm = 56.7658, GNorm = 1.3473, lr_0 = 8.6590e-04
Loss = 1.7988e-01, PNorm = 56.8051, GNorm = 0.9322, lr_0 = 8.6531e-04
Loss = 1.7625e-01, PNorm = 56.8350, GNorm = 0.6521, lr_0 = 8.6472e-04
Loss = 2.0568e-01, PNorm = 56.8687, GNorm = 0.8728, lr_0 = 8.6413e-04
Loss = 2.1642e-01, PNorm = 56.8866, GNorm = 1.2440, lr_0 = 8.6353e-04
Loss = 1.8679e-01, PNorm = 56.9105, GNorm = 0.6389, lr_0 = 8.6294e-04
Loss = 2.0639e-01, PNorm = 56.9367, GNorm = 0.6979, lr_0 = 8.6235e-04
Loss = 2.1819e-01, PNorm = 56.9609, GNorm = 1.0455, lr_0 = 8.6176e-04
Loss = 2.0469e-01, PNorm = 56.9862, GNorm = 1.5258, lr_0 = 8.6117e-04
Loss = 2.0122e-01, PNorm = 57.0139, GNorm = 0.6298, lr_0 = 8.6058e-04
Loss = 2.0556e-01, PNorm = 57.0391, GNorm = 0.8407, lr_0 = 8.5999e-04
Loss = 1.7955e-01, PNorm = 57.0573, GNorm = 1.7190, lr_0 = 8.5940e-04
Loss = 1.7314e-01, PNorm = 57.0844, GNorm = 1.5459, lr_0 = 8.5881e-04
Loss = 1.9178e-01, PNorm = 57.1106, GNorm = 2.0225, lr_0 = 8.5823e-04
Loss = 1.8579e-01, PNorm = 57.1318, GNorm = 1.6003, lr_0 = 8.5764e-04
Loss = 2.0285e-01, PNorm = 57.1572, GNorm = 1.0158, lr_0 = 8.5705e-04
Loss = 2.1584e-01, PNorm = 57.1843, GNorm = 2.1002, lr_0 = 8.5646e-04
Loss = 2.1324e-01, PNorm = 57.2087, GNorm = 2.6901, lr_0 = 8.5588e-04
Loss = 1.8214e-01, PNorm = 57.2468, GNorm = 0.8050, lr_0 = 8.5529e-04
Loss = 1.9661e-01, PNorm = 57.2753, GNorm = 0.8552, lr_0 = 8.5470e-04
Loss = 2.0858e-01, PNorm = 57.2992, GNorm = 1.0634, lr_0 = 8.5412e-04
Loss = 1.8932e-01, PNorm = 57.3190, GNorm = 1.1999, lr_0 = 8.5353e-04
Loss = 2.0018e-01, PNorm = 57.3411, GNorm = 1.1951, lr_0 = 8.5295e-04
Loss = 2.1714e-01, PNorm = 57.3645, GNorm = 1.2710, lr_0 = 8.5236e-04
Loss = 1.9180e-01, PNorm = 57.3918, GNorm = 0.6857, lr_0 = 8.5178e-04
Loss = 1.9947e-01, PNorm = 57.4162, GNorm = 0.6700, lr_0 = 8.5120e-04
Loss = 1.9703e-01, PNorm = 57.4423, GNorm = 1.8418, lr_0 = 8.5061e-04
Loss = 1.8901e-01, PNorm = 57.4690, GNorm = 2.1136, lr_0 = 8.5003e-04
Loss = 2.2803e-01, PNorm = 57.4947, GNorm = 1.6103, lr_0 = 8.4945e-04
Loss = 2.1898e-01, PNorm = 57.5311, GNorm = 1.2500, lr_0 = 8.4887e-04
Loss = 2.2899e-01, PNorm = 57.5505, GNorm = 1.4660, lr_0 = 8.4828e-04
Validation mae = 0.272124
Epoch 4
Loss = 1.8487e-01, PNorm = 57.5873, GNorm = 0.8631, lr_0 = 8.4770e-04
Loss = 1.8852e-01, PNorm = 57.6132, GNorm = 0.8494, lr_0 = 8.4712e-04
Loss = 1.9205e-01, PNorm = 57.6379, GNorm = 1.7515, lr_0 = 8.4654e-04
Loss = 2.1165e-01, PNorm = 57.6600, GNorm = 1.2171, lr_0 = 8.4596e-04
Loss = 1.7358e-01, PNorm = 57.6851, GNorm = 0.9143, lr_0 = 8.4538e-04
Loss = 1.9387e-01, PNorm = 57.7199, GNorm = 0.7516, lr_0 = 8.4480e-04
Loss = 1.6713e-01, PNorm = 57.7496, GNorm = 0.8051, lr_0 = 8.4423e-04
Loss = 2.1411e-01, PNorm = 57.7755, GNorm = 1.8618, lr_0 = 8.4365e-04
Loss = 1.9583e-01, PNorm = 57.8022, GNorm = 2.4226, lr_0 = 8.4307e-04
Loss = 1.8877e-01, PNorm = 57.8357, GNorm = 1.4349, lr_0 = 8.4249e-04
Loss = 2.2450e-01, PNorm = 57.8600, GNorm = 2.0951, lr_0 = 8.4191e-04
Loss = 1.9381e-01, PNorm = 57.8933, GNorm = 1.3156, lr_0 = 8.4134e-04
Loss = 1.7999e-01, PNorm = 57.9166, GNorm = 1.0175, lr_0 = 8.4076e-04
Loss = 1.8723e-01, PNorm = 57.9385, GNorm = 0.7840, lr_0 = 8.4019e-04
Loss = 1.8523e-01, PNorm = 57.9651, GNorm = 0.5336, lr_0 = 8.3961e-04
Loss = 1.9659e-01, PNorm = 57.9875, GNorm = 0.9502, lr_0 = 8.3903e-04
Loss = 1.8072e-01, PNorm = 58.0149, GNorm = 0.8077, lr_0 = 8.3846e-04
Loss = 1.7509e-01, PNorm = 58.0362, GNorm = 0.8057, lr_0 = 8.3789e-04
Loss = 1.8875e-01, PNorm = 58.0620, GNorm = 0.8554, lr_0 = 8.3731e-04
Loss = 1.6286e-01, PNorm = 58.0882, GNorm = 0.8877, lr_0 = 8.3674e-04
Loss = 1.9883e-01, PNorm = 58.1192, GNorm = 0.9333, lr_0 = 8.3616e-04
Loss = 1.8224e-01, PNorm = 58.1517, GNorm = 1.7745, lr_0 = 8.3559e-04
Loss = 2.0659e-01, PNorm = 58.1816, GNorm = 0.6862, lr_0 = 8.3502e-04
Loss = 2.2013e-01, PNorm = 58.2128, GNorm = 2.4151, lr_0 = 8.3445e-04
Loss = 1.9450e-01, PNorm = 58.2467, GNorm = 1.1516, lr_0 = 8.3388e-04
Loss = 1.9895e-01, PNorm = 58.2759, GNorm = 0.7819, lr_0 = 8.3330e-04
Loss = 2.0318e-01, PNorm = 58.3050, GNorm = 1.6958, lr_0 = 8.3273e-04
Loss = 1.9477e-01, PNorm = 58.3291, GNorm = 0.9426, lr_0 = 8.3216e-04
Loss = 1.6808e-01, PNorm = 58.3551, GNorm = 1.8623, lr_0 = 8.3159e-04
Loss = 1.9843e-01, PNorm = 58.3797, GNorm = 1.0020, lr_0 = 8.3102e-04
Loss = 1.7628e-01, PNorm = 58.4007, GNorm = 0.8125, lr_0 = 8.3045e-04
Loss = 1.8105e-01, PNorm = 58.4211, GNorm = 0.8865, lr_0 = 8.2988e-04
Loss = 1.8034e-01, PNorm = 58.4414, GNorm = 1.4267, lr_0 = 8.2932e-04
Loss = 1.6146e-01, PNorm = 58.4669, GNorm = 0.9154, lr_0 = 8.2875e-04
Loss = 1.9808e-01, PNorm = 58.4862, GNorm = 0.9331, lr_0 = 8.2818e-04
Loss = 2.1674e-01, PNorm = 58.5161, GNorm = 1.4265, lr_0 = 8.2761e-04
Loss = 1.7613e-01, PNorm = 58.5388, GNorm = 1.0343, lr_0 = 8.2705e-04
Loss = 1.9067e-01, PNorm = 58.5627, GNorm = 1.0270, lr_0 = 8.2648e-04
Loss = 1.7966e-01, PNorm = 58.5879, GNorm = 0.9552, lr_0 = 8.2591e-04
Loss = 2.1203e-01, PNorm = 58.6139, GNorm = 1.5451, lr_0 = 8.2535e-04
Loss = 2.0508e-01, PNorm = 58.6519, GNorm = 1.4489, lr_0 = 8.2478e-04
Loss = 1.7104e-01, PNorm = 58.6755, GNorm = 0.8035, lr_0 = 8.2422e-04
Loss = 1.7632e-01, PNorm = 58.7026, GNorm = 0.8523, lr_0 = 8.2365e-04
Loss = 1.9306e-01, PNorm = 58.7259, GNorm = 0.9018, lr_0 = 8.2309e-04
Loss = 1.7530e-01, PNorm = 58.7522, GNorm = 1.6356, lr_0 = 8.2252e-04
Loss = 1.7647e-01, PNorm = 58.7845, GNorm = 1.9433, lr_0 = 8.2196e-04
Loss = 1.9234e-01, PNorm = 58.8177, GNorm = 0.8323, lr_0 = 8.2140e-04
Loss = 1.6751e-01, PNorm = 58.8455, GNorm = 0.8587, lr_0 = 8.2084e-04
Loss = 1.9928e-01, PNorm = 58.8642, GNorm = 1.1830, lr_0 = 8.2027e-04
Loss = 2.0625e-01, PNorm = 58.8932, GNorm = 0.7347, lr_0 = 8.1971e-04
Loss = 2.0591e-01, PNorm = 58.9271, GNorm = 0.7753, lr_0 = 8.1915e-04
Loss = 1.9830e-01, PNorm = 58.9627, GNorm = 0.6712, lr_0 = 8.1859e-04
Loss = 1.9638e-01, PNorm = 58.9886, GNorm = 1.1518, lr_0 = 8.1803e-04
Loss = 1.9966e-01, PNorm = 59.0126, GNorm = 0.9845, lr_0 = 8.1747e-04
Loss = 1.7743e-01, PNorm = 59.0377, GNorm = 0.8300, lr_0 = 8.1691e-04
Loss = 1.9101e-01, PNorm = 59.0549, GNorm = 0.6683, lr_0 = 8.1635e-04
Loss = 1.8926e-01, PNorm = 59.0764, GNorm = 0.8504, lr_0 = 8.1579e-04
Loss = 1.6704e-01, PNorm = 59.0990, GNorm = 0.7800, lr_0 = 8.1523e-04
Loss = 1.9366e-01, PNorm = 59.1292, GNorm = 1.1721, lr_0 = 8.1467e-04
Loss = 1.8569e-01, PNorm = 59.1573, GNorm = 0.9938, lr_0 = 8.1411e-04
Loss = 1.9921e-01, PNorm = 59.1814, GNorm = 1.2090, lr_0 = 8.1355e-04
Loss = 1.9136e-01, PNorm = 59.2020, GNorm = 1.7313, lr_0 = 8.1300e-04
Loss = 1.5741e-01, PNorm = 59.2248, GNorm = 1.0412, lr_0 = 8.1244e-04
Loss = 1.6913e-01, PNorm = 59.2528, GNorm = 0.7519, lr_0 = 8.1188e-04
Loss = 1.9215e-01, PNorm = 59.2794, GNorm = 0.9698, lr_0 = 8.1133e-04
Loss = 1.8066e-01, PNorm = 59.3074, GNorm = 0.5998, lr_0 = 8.1077e-04
Loss = 1.9482e-01, PNorm = 59.3363, GNorm = 1.0143, lr_0 = 8.1022e-04
Loss = 1.7916e-01, PNorm = 59.3699, GNorm = 0.7725, lr_0 = 8.0966e-04
Loss = 1.9603e-01, PNorm = 59.3914, GNorm = 0.7117, lr_0 = 8.0911e-04
Loss = 2.1419e-01, PNorm = 59.4299, GNorm = 1.7440, lr_0 = 8.0855e-04
Loss = 1.7297e-01, PNorm = 59.4563, GNorm = 0.7985, lr_0 = 8.0800e-04
Loss = 1.6806e-01, PNorm = 59.4778, GNorm = 0.6811, lr_0 = 8.0745e-04
Loss = 1.3826e-01, PNorm = 59.4991, GNorm = 0.6959, lr_0 = 8.0689e-04
Loss = 1.7613e-01, PNorm = 59.5207, GNorm = 0.7024, lr_0 = 8.0634e-04
Loss = 1.9436e-01, PNorm = 59.5397, GNorm = 0.8452, lr_0 = 8.0579e-04
Loss = 2.2353e-01, PNorm = 59.5746, GNorm = 1.2781, lr_0 = 8.0523e-04
Loss = 2.4079e-01, PNorm = 59.6155, GNorm = 2.0956, lr_0 = 8.0468e-04
Loss = 2.0885e-01, PNorm = 59.6540, GNorm = 0.7240, lr_0 = 8.0413e-04
Loss = 1.8163e-01, PNorm = 59.6817, GNorm = 1.0402, lr_0 = 8.0358e-04
Loss = 1.8732e-01, PNorm = 59.7037, GNorm = 0.5741, lr_0 = 8.0303e-04
Loss = 1.8185e-01, PNorm = 59.7269, GNorm = 0.8792, lr_0 = 8.0248e-04
Loss = 1.7819e-01, PNorm = 59.7442, GNorm = 1.6622, lr_0 = 8.0193e-04
Loss = 1.9838e-01, PNorm = 59.7698, GNorm = 1.9053, lr_0 = 8.0138e-04
Loss = 1.9751e-01, PNorm = 59.7934, GNorm = 1.3808, lr_0 = 8.0083e-04
Loss = 1.8655e-01, PNorm = 59.8179, GNorm = 1.1398, lr_0 = 8.0028e-04
Loss = 1.9433e-01, PNorm = 59.8399, GNorm = 1.0423, lr_0 = 7.9974e-04
Loss = 2.0583e-01, PNorm = 59.8618, GNorm = 0.6756, lr_0 = 7.9919e-04
Loss = 2.1016e-01, PNorm = 59.8931, GNorm = 2.3822, lr_0 = 7.9864e-04
Loss = 1.9646e-01, PNorm = 59.9185, GNorm = 1.8725, lr_0 = 7.9809e-04
Loss = 1.8927e-01, PNorm = 59.9436, GNorm = 0.8387, lr_0 = 7.9755e-04
Loss = 1.9060e-01, PNorm = 59.9685, GNorm = 1.0114, lr_0 = 7.9700e-04
Loss = 1.6503e-01, PNorm = 59.9924, GNorm = 0.6456, lr_0 = 7.9645e-04
Loss = 1.8791e-01, PNorm = 60.0094, GNorm = 1.2312, lr_0 = 7.9591e-04
Loss = 1.8629e-01, PNorm = 60.0326, GNorm = 1.0848, lr_0 = 7.9536e-04
Loss = 1.9527e-01, PNorm = 60.0572, GNorm = 0.9489, lr_0 = 7.9482e-04
Loss = 1.9225e-01, PNorm = 60.0833, GNorm = 1.0328, lr_0 = 7.9427e-04
Loss = 1.8113e-01, PNorm = 60.1049, GNorm = 0.9302, lr_0 = 7.9373e-04
Loss = 1.9418e-01, PNorm = 60.1306, GNorm = 1.1781, lr_0 = 7.9319e-04
Loss = 1.8209e-01, PNorm = 60.1537, GNorm = 1.4321, lr_0 = 7.9264e-04
Loss = 1.9866e-01, PNorm = 60.1804, GNorm = 1.0544, lr_0 = 7.9210e-04
Loss = 1.8781e-01, PNorm = 60.2002, GNorm = 2.3207, lr_0 = 7.9156e-04
Loss = 1.8578e-01, PNorm = 60.2266, GNorm = 1.0812, lr_0 = 7.9101e-04
Loss = 1.5767e-01, PNorm = 60.2542, GNorm = 1.2962, lr_0 = 7.9047e-04
Loss = 1.6858e-01, PNorm = 60.2767, GNorm = 0.9531, lr_0 = 7.8993e-04
Loss = 1.7465e-01, PNorm = 60.3042, GNorm = 0.8063, lr_0 = 7.8939e-04
Loss = 1.8312e-01, PNorm = 60.3233, GNorm = 0.7831, lr_0 = 7.8885e-04
Loss = 1.9114e-01, PNorm = 60.3409, GNorm = 1.1027, lr_0 = 7.8831e-04
Loss = 1.9295e-01, PNorm = 60.3578, GNorm = 0.9810, lr_0 = 7.8777e-04
Loss = 1.9445e-01, PNorm = 60.3834, GNorm = 1.2746, lr_0 = 7.8723e-04
Loss = 1.8471e-01, PNorm = 60.4106, GNorm = 0.8278, lr_0 = 7.8669e-04
Loss = 1.9770e-01, PNorm = 60.4338, GNorm = 1.1000, lr_0 = 7.8615e-04
Loss = 1.8745e-01, PNorm = 60.4571, GNorm = 0.6084, lr_0 = 7.8561e-04
Loss = 2.1224e-01, PNorm = 60.4811, GNorm = 1.5531, lr_0 = 7.8507e-04
Loss = 1.9114e-01, PNorm = 60.5126, GNorm = 0.8584, lr_0 = 7.8454e-04
Loss = 1.7302e-01, PNorm = 60.5480, GNorm = 0.6413, lr_0 = 7.8400e-04
Loss = 1.7474e-01, PNorm = 60.5745, GNorm = 1.5835, lr_0 = 7.8346e-04
Loss = 1.8315e-01, PNorm = 60.5947, GNorm = 0.9597, lr_0 = 7.8293e-04
Loss = 1.8254e-01, PNorm = 60.6176, GNorm = 0.7166, lr_0 = 7.8239e-04
Loss = 1.7579e-01, PNorm = 60.6393, GNorm = 0.5349, lr_0 = 7.8185e-04
Loss = 1.8313e-01, PNorm = 60.6516, GNorm = 0.8098, lr_0 = 7.8132e-04
Validation mae = 0.260736
Epoch 5
Loss = 1.6965e-01, PNorm = 60.6805, GNorm = 0.9975, lr_0 = 7.8078e-04
Loss = 1.5648e-01, PNorm = 60.7043, GNorm = 0.5548, lr_0 = 7.8025e-04
Loss = 1.6908e-01, PNorm = 60.7301, GNorm = 0.6792, lr_0 = 7.7971e-04
Loss = 1.6379e-01, PNorm = 60.7464, GNorm = 0.9531, lr_0 = 7.7918e-04
Loss = 1.6041e-01, PNorm = 60.7673, GNorm = 0.6768, lr_0 = 7.7864e-04
Loss = 1.7713e-01, PNorm = 60.7937, GNorm = 1.8083, lr_0 = 7.7811e-04
Loss = 1.6102e-01, PNorm = 60.8231, GNorm = 0.6010, lr_0 = 7.7758e-04
Loss = 2.0129e-01, PNorm = 60.8529, GNorm = 1.1045, lr_0 = 7.7705e-04
Loss = 1.6049e-01, PNorm = 60.8790, GNorm = 0.6646, lr_0 = 7.7651e-04
Loss = 1.6258e-01, PNorm = 60.9024, GNorm = 1.0822, lr_0 = 7.7598e-04
Loss = 1.7422e-01, PNorm = 60.9212, GNorm = 1.5226, lr_0 = 7.7545e-04
Loss = 1.8872e-01, PNorm = 60.9444, GNorm = 1.0348, lr_0 = 7.7492e-04
Loss = 1.8651e-01, PNorm = 60.9717, GNorm = 0.9461, lr_0 = 7.7439e-04
Loss = 1.8311e-01, PNorm = 60.9995, GNorm = 1.5105, lr_0 = 7.7386e-04
Loss = 1.5885e-01, PNorm = 61.0291, GNorm = 0.7772, lr_0 = 7.7333e-04
Loss = 1.5207e-01, PNorm = 61.0508, GNorm = 0.4568, lr_0 = 7.7280e-04
Loss = 1.6939e-01, PNorm = 61.0639, GNorm = 1.0557, lr_0 = 7.7227e-04
Loss = 1.9734e-01, PNorm = 61.0930, GNorm = 3.6198, lr_0 = 7.7174e-04
Loss = 1.7405e-01, PNorm = 61.1262, GNorm = 1.0232, lr_0 = 7.7121e-04
Loss = 1.7013e-01, PNorm = 61.1572, GNorm = 0.7386, lr_0 = 7.7068e-04
Loss = 1.9058e-01, PNorm = 61.1765, GNorm = 1.3108, lr_0 = 7.7015e-04
Loss = 1.7152e-01, PNorm = 61.2041, GNorm = 1.5133, lr_0 = 7.6963e-04
Loss = 1.5296e-01, PNorm = 61.2327, GNorm = 0.6045, lr_0 = 7.6910e-04
Loss = 1.6622e-01, PNorm = 61.2558, GNorm = 0.7707, lr_0 = 7.6857e-04
Loss = 1.5032e-01, PNorm = 61.2754, GNorm = 0.7616, lr_0 = 7.6805e-04
Loss = 1.6292e-01, PNorm = 61.2877, GNorm = 1.6007, lr_0 = 7.6752e-04
Loss = 1.5286e-01, PNorm = 61.3130, GNorm = 0.6802, lr_0 = 7.6699e-04
Loss = 1.7869e-01, PNorm = 61.3318, GNorm = 0.4497, lr_0 = 7.6647e-04
Loss = 1.5710e-01, PNorm = 61.3510, GNorm = 1.2760, lr_0 = 7.6594e-04
Loss = 1.5343e-01, PNorm = 61.3683, GNorm = 0.8691, lr_0 = 7.6542e-04
Loss = 1.5969e-01, PNorm = 61.3955, GNorm = 0.8531, lr_0 = 7.6489e-04
Loss = 1.7679e-01, PNorm = 61.4225, GNorm = 1.0502, lr_0 = 7.6437e-04
Loss = 1.7679e-01, PNorm = 61.4455, GNorm = 1.2842, lr_0 = 7.6385e-04
Loss = 1.7550e-01, PNorm = 61.4661, GNorm = 1.4706, lr_0 = 7.6332e-04
Loss = 1.5883e-01, PNorm = 61.4917, GNorm = 1.0741, lr_0 = 7.6280e-04
Loss = 1.7032e-01, PNorm = 61.5151, GNorm = 1.0640, lr_0 = 7.6228e-04
Loss = 1.5910e-01, PNorm = 61.5409, GNorm = 1.1073, lr_0 = 7.6176e-04
Loss = 1.8831e-01, PNorm = 61.5703, GNorm = 1.0533, lr_0 = 7.6123e-04
Loss = 1.9199e-01, PNorm = 61.5931, GNorm = 2.2870, lr_0 = 7.6071e-04
Loss = 2.0108e-01, PNorm = 61.6215, GNorm = 0.8737, lr_0 = 7.6019e-04
Loss = 1.6648e-01, PNorm = 61.6485, GNorm = 0.8260, lr_0 = 7.5967e-04
Loss = 1.6769e-01, PNorm = 61.6740, GNorm = 1.2292, lr_0 = 7.5915e-04
Loss = 1.8760e-01, PNorm = 61.6941, GNorm = 0.7004, lr_0 = 7.5863e-04
Loss = 1.8595e-01, PNorm = 61.7197, GNorm = 1.2987, lr_0 = 7.5811e-04
Loss = 1.6590e-01, PNorm = 61.7406, GNorm = 0.7435, lr_0 = 7.5759e-04
Loss = 1.8235e-01, PNorm = 61.7640, GNorm = 1.4885, lr_0 = 7.5707e-04
Loss = 1.8427e-01, PNorm = 61.8028, GNorm = 1.8706, lr_0 = 7.5655e-04
Loss = 1.9106e-01, PNorm = 61.8259, GNorm = 0.7269, lr_0 = 7.5603e-04
Loss = 1.9042e-01, PNorm = 61.8555, GNorm = 0.8879, lr_0 = 7.5552e-04
Loss = 1.8527e-01, PNorm = 61.8790, GNorm = 0.9691, lr_0 = 7.5500e-04
Loss = 2.0073e-01, PNorm = 61.9052, GNorm = 0.9370, lr_0 = 7.5448e-04
Loss = 1.8179e-01, PNorm = 61.9291, GNorm = 0.9079, lr_0 = 7.5397e-04
Loss = 1.7271e-01, PNorm = 61.9457, GNorm = 1.4887, lr_0 = 7.5345e-04
Loss = 2.2258e-01, PNorm = 61.9691, GNorm = 0.9193, lr_0 = 7.5293e-04
Loss = 1.7089e-01, PNorm = 61.9949, GNorm = 2.1557, lr_0 = 7.5242e-04
Loss = 1.6891e-01, PNorm = 62.0282, GNorm = 1.0833, lr_0 = 7.5190e-04
Loss = 1.5969e-01, PNorm = 62.0549, GNorm = 0.6754, lr_0 = 7.5139e-04
Loss = 1.9117e-01, PNorm = 62.0798, GNorm = 0.8476, lr_0 = 7.5087e-04
Loss = 1.7615e-01, PNorm = 62.1079, GNorm = 1.6715, lr_0 = 7.5036e-04
Loss = 1.8580e-01, PNorm = 62.1283, GNorm = 0.5663, lr_0 = 7.4984e-04
Loss = 1.7564e-01, PNorm = 62.1523, GNorm = 0.5165, lr_0 = 7.4933e-04
Loss = 1.6091e-01, PNorm = 62.1739, GNorm = 0.8456, lr_0 = 7.4882e-04
Loss = 1.6382e-01, PNorm = 62.1940, GNorm = 0.9990, lr_0 = 7.4830e-04
Loss = 1.8352e-01, PNorm = 62.2205, GNorm = 1.0566, lr_0 = 7.4779e-04
Loss = 1.9015e-01, PNorm = 62.2397, GNorm = 0.7843, lr_0 = 7.4728e-04
Loss = 1.8022e-01, PNorm = 62.2594, GNorm = 0.9602, lr_0 = 7.4677e-04
Loss = 1.6266e-01, PNorm = 62.2785, GNorm = 1.1561, lr_0 = 7.4625e-04
Loss = 1.7351e-01, PNorm = 62.3060, GNorm = 1.1000, lr_0 = 7.4574e-04
Loss = 1.7616e-01, PNorm = 62.3312, GNorm = 0.9086, lr_0 = 7.4523e-04
Loss = 1.6336e-01, PNorm = 62.3560, GNorm = 0.6002, lr_0 = 7.4472e-04
Loss = 1.7599e-01, PNorm = 62.3777, GNorm = 1.6162, lr_0 = 7.4421e-04
Loss = 1.7507e-01, PNorm = 62.3985, GNorm = 1.1010, lr_0 = 7.4370e-04
Loss = 1.6540e-01, PNorm = 62.4202, GNorm = 0.7260, lr_0 = 7.4319e-04
Loss = 1.8521e-01, PNorm = 62.4387, GNorm = 1.1315, lr_0 = 7.4268e-04
Loss = 1.7672e-01, PNorm = 62.4611, GNorm = 1.0554, lr_0 = 7.4217e-04
Loss = 1.5191e-01, PNorm = 62.4801, GNorm = 0.6023, lr_0 = 7.4167e-04
Loss = 1.7191e-01, PNorm = 62.5123, GNorm = 1.9656, lr_0 = 7.4116e-04
Loss = 1.8344e-01, PNorm = 62.5451, GNorm = 0.9305, lr_0 = 7.4065e-04
Loss = 1.6598e-01, PNorm = 62.5730, GNorm = 0.8362, lr_0 = 7.4014e-04
Loss = 1.7765e-01, PNorm = 62.5972, GNorm = 1.6050, lr_0 = 7.3964e-04
Loss = 1.6372e-01, PNorm = 62.6169, GNorm = 0.8631, lr_0 = 7.3913e-04
Loss = 1.8285e-01, PNorm = 62.6446, GNorm = 0.7469, lr_0 = 7.3862e-04
Loss = 1.7568e-01, PNorm = 62.6651, GNorm = 0.6280, lr_0 = 7.3812e-04
Loss = 1.8559e-01, PNorm = 62.6963, GNorm = 1.0827, lr_0 = 7.3761e-04
Loss = 2.0829e-01, PNorm = 62.7288, GNorm = 0.7811, lr_0 = 7.3711e-04
Loss = 2.1605e-01, PNorm = 62.7600, GNorm = 1.3784, lr_0 = 7.3660e-04
Loss = 2.1164e-01, PNorm = 62.7928, GNorm = 1.1132, lr_0 = 7.3610e-04
Loss = 1.8095e-01, PNorm = 62.8181, GNorm = 0.6086, lr_0 = 7.3559e-04
Loss = 1.7547e-01, PNorm = 62.8424, GNorm = 0.7388, lr_0 = 7.3509e-04
Loss = 1.8421e-01, PNorm = 62.8648, GNorm = 1.6605, lr_0 = 7.3458e-04
Loss = 1.9247e-01, PNorm = 62.8898, GNorm = 0.7374, lr_0 = 7.3408e-04
Loss = 2.0558e-01, PNorm = 62.9149, GNorm = 1.0569, lr_0 = 7.3358e-04
Loss = 1.6428e-01, PNorm = 62.9437, GNorm = 1.7891, lr_0 = 7.3308e-04
Loss = 1.7576e-01, PNorm = 62.9729, GNorm = 1.7384, lr_0 = 7.3257e-04
Loss = 1.9333e-01, PNorm = 63.0071, GNorm = 0.8496, lr_0 = 7.3207e-04
Loss = 1.7901e-01, PNorm = 63.0386, GNorm = 0.7925, lr_0 = 7.3157e-04
Loss = 1.7284e-01, PNorm = 63.0596, GNorm = 0.6363, lr_0 = 7.3107e-04
Loss = 1.7201e-01, PNorm = 63.0783, GNorm = 1.3842, lr_0 = 7.3057e-04
Loss = 1.9414e-01, PNorm = 63.0995, GNorm = 0.9310, lr_0 = 7.3007e-04
Loss = 1.6339e-01, PNorm = 63.1217, GNorm = 0.7340, lr_0 = 7.2957e-04
Loss = 1.7273e-01, PNorm = 63.1434, GNorm = 0.8747, lr_0 = 7.2907e-04
Loss = 1.7677e-01, PNorm = 63.1768, GNorm = 0.8132, lr_0 = 7.2857e-04
Loss = 1.6044e-01, PNorm = 63.2085, GNorm = 0.5566, lr_0 = 7.2807e-04
Loss = 1.7293e-01, PNorm = 63.2264, GNorm = 0.9846, lr_0 = 7.2757e-04
Loss = 1.6243e-01, PNorm = 63.2478, GNorm = 0.6434, lr_0 = 7.2707e-04
Loss = 1.6328e-01, PNorm = 63.2640, GNorm = 0.6661, lr_0 = 7.2657e-04
Loss = 1.6751e-01, PNorm = 63.2862, GNorm = 0.6664, lr_0 = 7.2608e-04
Loss = 1.8217e-01, PNorm = 63.3101, GNorm = 1.0484, lr_0 = 7.2558e-04
Loss = 1.7831e-01, PNorm = 63.3432, GNorm = 0.5675, lr_0 = 7.2508e-04
Loss = 1.6769e-01, PNorm = 63.3631, GNorm = 0.8231, lr_0 = 7.2458e-04
Loss = 1.8296e-01, PNorm = 63.3894, GNorm = 0.7926, lr_0 = 7.2409e-04
Loss = 1.7390e-01, PNorm = 63.4069, GNorm = 0.7804, lr_0 = 7.2359e-04
Loss = 1.6017e-01, PNorm = 63.4232, GNorm = 1.0124, lr_0 = 7.2310e-04
Loss = 1.8232e-01, PNorm = 63.4422, GNorm = 0.5888, lr_0 = 7.2260e-04
Loss = 1.7118e-01, PNorm = 63.4698, GNorm = 0.6730, lr_0 = 7.2211e-04
Loss = 2.0941e-01, PNorm = 63.4964, GNorm = 1.8323, lr_0 = 7.2161e-04
Loss = 1.9098e-01, PNorm = 63.5184, GNorm = 1.3691, lr_0 = 7.2112e-04
Loss = 1.8010e-01, PNorm = 63.5381, GNorm = 0.8495, lr_0 = 7.2062e-04
Loss = 1.8622e-01, PNorm = 63.5667, GNorm = 0.7799, lr_0 = 7.2013e-04
Loss = 1.9831e-01, PNorm = 63.5901, GNorm = 0.9613, lr_0 = 7.1964e-04
Validation mae = 0.260420
Epoch 6
Loss = 1.7208e-01, PNorm = 63.6196, GNorm = 0.6756, lr_0 = 7.1914e-04
Loss = 1.7455e-01, PNorm = 63.6488, GNorm = 0.9742, lr_0 = 7.1865e-04
Loss = 1.7230e-01, PNorm = 63.6698, GNorm = 0.5698, lr_0 = 7.1816e-04
Loss = 1.6397e-01, PNorm = 63.6964, GNorm = 0.7167, lr_0 = 7.1767e-04
Loss = 1.7208e-01, PNorm = 63.7210, GNorm = 0.7827, lr_0 = 7.1717e-04
Loss = 1.5615e-01, PNorm = 63.7490, GNorm = 0.9204, lr_0 = 7.1668e-04
Loss = 1.8961e-01, PNorm = 63.7730, GNorm = 1.3896, lr_0 = 7.1619e-04
Loss = 1.4860e-01, PNorm = 63.7918, GNorm = 0.8269, lr_0 = 7.1570e-04
Loss = 1.7997e-01, PNorm = 63.8169, GNorm = 1.0989, lr_0 = 7.1521e-04
Loss = 1.5109e-01, PNorm = 63.8410, GNorm = 0.5433, lr_0 = 7.1472e-04
Loss = 1.6904e-01, PNorm = 63.8628, GNorm = 0.7071, lr_0 = 7.1423e-04
Loss = 1.7805e-01, PNorm = 63.8881, GNorm = 1.1203, lr_0 = 7.1374e-04
Loss = 1.6770e-01, PNorm = 63.9178, GNorm = 0.6033, lr_0 = 7.1325e-04
Loss = 1.7114e-01, PNorm = 63.9444, GNorm = 2.0122, lr_0 = 7.1277e-04
Loss = 1.7263e-01, PNorm = 63.9784, GNorm = 1.3379, lr_0 = 7.1228e-04
Loss = 1.9062e-01, PNorm = 63.9977, GNorm = 2.1714, lr_0 = 7.1179e-04
Loss = 2.0936e-01, PNorm = 64.0188, GNorm = 1.7489, lr_0 = 7.1130e-04
Loss = 1.7232e-01, PNorm = 64.0405, GNorm = 0.7499, lr_0 = 7.1081e-04
Loss = 1.6827e-01, PNorm = 64.0718, GNorm = 1.4616, lr_0 = 7.1033e-04
Loss = 1.7567e-01, PNorm = 64.1056, GNorm = 0.9705, lr_0 = 7.0984e-04
Loss = 1.5242e-01, PNorm = 64.1403, GNorm = 0.6986, lr_0 = 7.0935e-04
Loss = 1.5221e-01, PNorm = 64.1508, GNorm = 0.6788, lr_0 = 7.0887e-04
Loss = 1.7077e-01, PNorm = 64.1719, GNorm = 0.5925, lr_0 = 7.0838e-04
Loss = 1.6032e-01, PNorm = 64.1920, GNorm = 0.7011, lr_0 = 7.0790e-04
Loss = 1.7123e-01, PNorm = 64.2113, GNorm = 0.9733, lr_0 = 7.0741e-04
Loss = 1.6802e-01, PNorm = 64.2322, GNorm = 1.1150, lr_0 = 7.0693e-04
Loss = 1.7261e-01, PNorm = 64.2514, GNorm = 1.0014, lr_0 = 7.0644e-04
Loss = 1.8059e-01, PNorm = 64.2747, GNorm = 1.0196, lr_0 = 7.0596e-04
Loss = 1.9731e-01, PNorm = 64.2989, GNorm = 0.8668, lr_0 = 7.0548e-04
Loss = 1.7195e-01, PNorm = 64.3299, GNorm = 0.7312, lr_0 = 7.0499e-04
Loss = 1.5796e-01, PNorm = 64.3550, GNorm = 0.5883, lr_0 = 7.0451e-04
Loss = 1.4170e-01, PNorm = 64.3789, GNorm = 1.2722, lr_0 = 7.0403e-04
Loss = 1.5046e-01, PNorm = 64.4048, GNorm = 0.6468, lr_0 = 7.0354e-04
Loss = 1.6493e-01, PNorm = 64.4284, GNorm = 0.9000, lr_0 = 7.0306e-04
Loss = 1.6757e-01, PNorm = 64.4493, GNorm = 0.7330, lr_0 = 7.0258e-04
Loss = 1.5894e-01, PNorm = 64.4720, GNorm = 1.0326, lr_0 = 7.0210e-04
Loss = 1.6540e-01, PNorm = 64.4967, GNorm = 1.3908, lr_0 = 7.0162e-04
Loss = 1.7360e-01, PNorm = 64.5194, GNorm = 0.8054, lr_0 = 7.0114e-04
Loss = 1.4401e-01, PNorm = 64.5403, GNorm = 0.5914, lr_0 = 7.0066e-04
Loss = 1.4763e-01, PNorm = 64.5578, GNorm = 0.7741, lr_0 = 7.0018e-04
Loss = 1.6568e-01, PNorm = 64.5777, GNorm = 0.9459, lr_0 = 6.9970e-04
Loss = 1.6044e-01, PNorm = 64.5986, GNorm = 0.6873, lr_0 = 6.9922e-04
Loss = 1.4513e-01, PNorm = 64.6181, GNorm = 0.9783, lr_0 = 6.9874e-04
Loss = 1.6753e-01, PNorm = 64.6409, GNorm = 0.5536, lr_0 = 6.9826e-04
Loss = 1.6277e-01, PNorm = 64.6540, GNorm = 1.5166, lr_0 = 6.9778e-04
Loss = 1.7899e-01, PNorm = 64.6694, GNorm = 0.6736, lr_0 = 6.9730e-04
Loss = 1.7410e-01, PNorm = 64.6938, GNorm = 0.6518, lr_0 = 6.9683e-04
Loss = 1.7291e-01, PNorm = 64.7136, GNorm = 1.6042, lr_0 = 6.9635e-04
Loss = 1.4351e-01, PNorm = 64.7347, GNorm = 0.5024, lr_0 = 6.9587e-04
Loss = 1.5935e-01, PNorm = 64.7554, GNorm = 1.9593, lr_0 = 6.9540e-04
Loss = 1.6569e-01, PNorm = 64.7810, GNorm = 0.6943, lr_0 = 6.9492e-04
Loss = 1.5511e-01, PNorm = 64.7951, GNorm = 0.7933, lr_0 = 6.9444e-04
Loss = 1.6809e-01, PNorm = 64.8169, GNorm = 0.5964, lr_0 = 6.9397e-04
Loss = 1.6214e-01, PNorm = 64.8357, GNorm = 0.6251, lr_0 = 6.9349e-04
Loss = 1.5648e-01, PNorm = 64.8625, GNorm = 1.0258, lr_0 = 6.9302e-04
Loss = 1.7148e-01, PNorm = 64.8899, GNorm = 1.0084, lr_0 = 6.9254e-04
Loss = 1.5609e-01, PNorm = 64.9088, GNorm = 0.9323, lr_0 = 6.9207e-04
Loss = 1.6029e-01, PNorm = 64.9222, GNorm = 0.6052, lr_0 = 6.9159e-04
Loss = 1.5417e-01, PNorm = 64.9374, GNorm = 1.1263, lr_0 = 6.9112e-04
Loss = 1.3790e-01, PNorm = 64.9567, GNorm = 0.4795, lr_0 = 6.9065e-04
Loss = 1.5954e-01, PNorm = 64.9826, GNorm = 0.8823, lr_0 = 6.9017e-04
Loss = 1.5884e-01, PNorm = 65.0147, GNorm = 0.9074, lr_0 = 6.8970e-04
Loss = 1.6735e-01, PNorm = 65.0407, GNorm = 1.4139, lr_0 = 6.8923e-04
Loss = 1.7397e-01, PNorm = 65.0667, GNorm = 1.0960, lr_0 = 6.8876e-04
Loss = 1.5914e-01, PNorm = 65.0903, GNorm = 0.6727, lr_0 = 6.8828e-04
Loss = 1.7391e-01, PNorm = 65.1146, GNorm = 0.9922, lr_0 = 6.8781e-04
Loss = 1.6663e-01, PNorm = 65.1381, GNorm = 1.1464, lr_0 = 6.8734e-04
Loss = 1.6947e-01, PNorm = 65.1555, GNorm = 0.9022, lr_0 = 6.8687e-04
Loss = 1.8075e-01, PNorm = 65.1732, GNorm = 1.2457, lr_0 = 6.8640e-04
Loss = 1.8082e-01, PNorm = 65.1944, GNorm = 0.6990, lr_0 = 6.8593e-04
Loss = 1.7187e-01, PNorm = 65.2224, GNorm = 0.6193, lr_0 = 6.8546e-04
Loss = 1.5856e-01, PNorm = 65.2421, GNorm = 1.1977, lr_0 = 6.8499e-04
Loss = 1.5552e-01, PNorm = 65.2626, GNorm = 0.9124, lr_0 = 6.8452e-04
Loss = 1.7127e-01, PNorm = 65.2803, GNorm = 0.7802, lr_0 = 6.8405e-04
Loss = 1.6553e-01, PNorm = 65.3062, GNorm = 1.1318, lr_0 = 6.8358e-04
Loss = 1.5067e-01, PNorm = 65.3297, GNorm = 1.4072, lr_0 = 6.8312e-04
Loss = 1.9175e-01, PNorm = 65.3537, GNorm = 1.5728, lr_0 = 6.8265e-04
Loss = 1.4595e-01, PNorm = 65.3825, GNorm = 0.6810, lr_0 = 6.8218e-04
Loss = 1.6254e-01, PNorm = 65.4007, GNorm = 0.7095, lr_0 = 6.8171e-04
Loss = 1.6319e-01, PNorm = 65.4261, GNorm = 0.5826, lr_0 = 6.8125e-04
Loss = 1.5648e-01, PNorm = 65.4423, GNorm = 0.6493, lr_0 = 6.8078e-04
Loss = 1.6867e-01, PNorm = 65.4654, GNorm = 1.3323, lr_0 = 6.8031e-04
Loss = 1.6321e-01, PNorm = 65.4909, GNorm = 1.4118, lr_0 = 6.7985e-04
Loss = 1.8324e-01, PNorm = 65.5255, GNorm = 1.4758, lr_0 = 6.7938e-04
Loss = 1.7561e-01, PNorm = 65.5506, GNorm = 0.9860, lr_0 = 6.7892e-04
Loss = 1.7361e-01, PNorm = 65.5814, GNorm = 1.6449, lr_0 = 6.7845e-04
Loss = 1.5581e-01, PNorm = 65.6074, GNorm = 1.0114, lr_0 = 6.7799e-04
Loss = 1.5205e-01, PNorm = 65.6231, GNorm = 0.7439, lr_0 = 6.7752e-04
Loss = 1.6618e-01, PNorm = 65.6345, GNorm = 0.5898, lr_0 = 6.7706e-04
Loss = 1.6739e-01, PNorm = 65.6557, GNorm = 0.7546, lr_0 = 6.7659e-04
Loss = 1.7792e-01, PNorm = 65.6823, GNorm = 0.7559, lr_0 = 6.7613e-04
Loss = 1.7616e-01, PNorm = 65.7155, GNorm = 0.9562, lr_0 = 6.7567e-04
Loss = 1.7197e-01, PNorm = 65.7434, GNorm = 0.8772, lr_0 = 6.7520e-04
Loss = 1.5776e-01, PNorm = 65.7641, GNorm = 0.8124, lr_0 = 6.7474e-04
Loss = 1.8205e-01, PNorm = 65.7760, GNorm = 0.8186, lr_0 = 6.7428e-04
Loss = 1.7301e-01, PNorm = 65.7996, GNorm = 0.6795, lr_0 = 6.7382e-04
Loss = 1.5544e-01, PNorm = 65.8259, GNorm = 0.9186, lr_0 = 6.7335e-04
Loss = 1.7200e-01, PNorm = 65.8420, GNorm = 0.9332, lr_0 = 6.7289e-04
Loss = 1.6641e-01, PNorm = 65.8639, GNorm = 0.9056, lr_0 = 6.7243e-04
Loss = 1.8354e-01, PNorm = 65.8817, GNorm = 1.4432, lr_0 = 6.7197e-04
Loss = 1.6891e-01, PNorm = 65.9099, GNorm = 1.1722, lr_0 = 6.7151e-04
Loss = 1.6551e-01, PNorm = 65.9235, GNorm = 0.5713, lr_0 = 6.7105e-04
Loss = 1.7633e-01, PNorm = 65.9441, GNorm = 0.8032, lr_0 = 6.7059e-04
Loss = 1.6970e-01, PNorm = 65.9678, GNorm = 0.7252, lr_0 = 6.7013e-04
Loss = 1.6140e-01, PNorm = 65.9851, GNorm = 1.5812, lr_0 = 6.6967e-04
Loss = 1.6604e-01, PNorm = 66.0045, GNorm = 1.2129, lr_0 = 6.6921e-04
Loss = 1.6744e-01, PNorm = 66.0246, GNorm = 1.1378, lr_0 = 6.6876e-04
Loss = 1.7806e-01, PNorm = 66.0417, GNorm = 0.9574, lr_0 = 6.6830e-04
Loss = 1.5539e-01, PNorm = 66.0601, GNorm = 0.6100, lr_0 = 6.6784e-04
Loss = 1.8390e-01, PNorm = 66.0833, GNorm = 1.5472, lr_0 = 6.6738e-04
Loss = 1.7231e-01, PNorm = 66.1071, GNorm = 0.7064, lr_0 = 6.6693e-04
Loss = 1.6302e-01, PNorm = 66.1244, GNorm = 0.7568, lr_0 = 6.6647e-04
Loss = 1.6209e-01, PNorm = 66.1323, GNorm = 0.8396, lr_0 = 6.6601e-04
Loss = 1.7886e-01, PNorm = 66.1518, GNorm = 0.7502, lr_0 = 6.6556e-04
Loss = 1.4848e-01, PNorm = 66.1699, GNorm = 0.6219, lr_0 = 6.6510e-04
Loss = 1.6222e-01, PNorm = 66.1912, GNorm = 0.6530, lr_0 = 6.6464e-04
Loss = 1.7382e-01, PNorm = 66.2137, GNorm = 0.8021, lr_0 = 6.6419e-04
Loss = 1.5605e-01, PNorm = 66.2356, GNorm = 2.2228, lr_0 = 6.6373e-04
Loss = 1.5854e-01, PNorm = 66.2620, GNorm = 1.1884, lr_0 = 6.6328e-04
Loss = 1.7563e-01, PNorm = 66.2901, GNorm = 0.8655, lr_0 = 6.6282e-04
Validation mae = 0.281434
Epoch 7
Loss = 1.7861e-01, PNorm = 66.3228, GNorm = 0.5568, lr_0 = 6.6237e-04
Loss = 1.4662e-01, PNorm = 66.3514, GNorm = 1.3784, lr_0 = 6.6192e-04
Loss = 1.6673e-01, PNorm = 66.3762, GNorm = 1.3219, lr_0 = 6.6146e-04
Loss = 1.7018e-01, PNorm = 66.3954, GNorm = 1.3690, lr_0 = 6.6101e-04
Loss = 1.3138e-01, PNorm = 66.4131, GNorm = 0.6256, lr_0 = 6.6056e-04
Loss = 1.4948e-01, PNorm = 66.4322, GNorm = 0.8491, lr_0 = 6.6011e-04
Loss = 1.3564e-01, PNorm = 66.4489, GNorm = 0.5947, lr_0 = 6.5965e-04
Loss = 1.5363e-01, PNorm = 66.4669, GNorm = 0.6793, lr_0 = 6.5920e-04
Loss = 1.5909e-01, PNorm = 66.4880, GNorm = 1.0259, lr_0 = 6.5875e-04
Loss = 1.5429e-01, PNorm = 66.5175, GNorm = 1.0876, lr_0 = 6.5830e-04
Loss = 1.7322e-01, PNorm = 66.5432, GNorm = 0.8426, lr_0 = 6.5785e-04
Loss = 1.4273e-01, PNorm = 66.5668, GNorm = 0.8752, lr_0 = 6.5740e-04
Loss = 1.5483e-01, PNorm = 66.5853, GNorm = 0.7977, lr_0 = 6.5695e-04
Loss = 1.6340e-01, PNorm = 66.5982, GNorm = 0.8359, lr_0 = 6.5650e-04
Loss = 1.4368e-01, PNorm = 66.6160, GNorm = 1.5574, lr_0 = 6.5605e-04
Loss = 1.4392e-01, PNorm = 66.6390, GNorm = 0.7031, lr_0 = 6.5560e-04
Loss = 1.6081e-01, PNorm = 66.6573, GNorm = 1.7453, lr_0 = 6.5515e-04
Loss = 1.5671e-01, PNorm = 66.6842, GNorm = 0.6376, lr_0 = 6.5470e-04
Loss = 1.5103e-01, PNorm = 66.7124, GNorm = 1.3662, lr_0 = 6.5425e-04
Loss = 1.6229e-01, PNorm = 66.7410, GNorm = 0.7713, lr_0 = 6.5380e-04
Loss = 1.6242e-01, PNorm = 66.7686, GNorm = 0.6757, lr_0 = 6.5335e-04
Loss = 1.4356e-01, PNorm = 66.7888, GNorm = 0.8674, lr_0 = 6.5291e-04
Loss = 1.4180e-01, PNorm = 66.8049, GNorm = 1.0464, lr_0 = 6.5246e-04
Loss = 1.6269e-01, PNorm = 66.8272, GNorm = 1.1621, lr_0 = 6.5201e-04
Loss = 1.4507e-01, PNorm = 66.8509, GNorm = 0.8812, lr_0 = 6.5157e-04
Loss = 1.6588e-01, PNorm = 66.8649, GNorm = 0.7774, lr_0 = 6.5112e-04
Loss = 1.4195e-01, PNorm = 66.8849, GNorm = 0.6261, lr_0 = 6.5067e-04
Loss = 1.5521e-01, PNorm = 66.9027, GNorm = 0.6100, lr_0 = 6.5023e-04
Loss = 1.4284e-01, PNorm = 66.9251, GNorm = 0.8974, lr_0 = 6.4978e-04
Loss = 1.3980e-01, PNorm = 66.9398, GNorm = 0.6533, lr_0 = 6.4934e-04
Loss = 1.5529e-01, PNorm = 66.9590, GNorm = 1.0287, lr_0 = 6.4889e-04
Loss = 1.5594e-01, PNorm = 66.9777, GNorm = 1.1664, lr_0 = 6.4845e-04
Loss = 1.4638e-01, PNorm = 67.0023, GNorm = 0.8581, lr_0 = 6.4800e-04
Loss = 1.7181e-01, PNorm = 67.0277, GNorm = 0.5100, lr_0 = 6.4756e-04
Loss = 1.6147e-01, PNorm = 67.0554, GNorm = 1.3139, lr_0 = 6.4712e-04
Loss = 1.5098e-01, PNorm = 67.0826, GNorm = 0.7694, lr_0 = 6.4667e-04
Loss = 1.8336e-01, PNorm = 67.1000, GNorm = 1.1108, lr_0 = 6.4623e-04
Loss = 1.8711e-01, PNorm = 67.1204, GNorm = 3.2318, lr_0 = 6.4579e-04
Loss = 1.4910e-01, PNorm = 67.1432, GNorm = 0.7152, lr_0 = 6.4534e-04
Loss = 1.4520e-01, PNorm = 67.1689, GNorm = 0.9725, lr_0 = 6.4490e-04
Loss = 1.5845e-01, PNorm = 67.1907, GNorm = 1.2275, lr_0 = 6.4446e-04
Loss = 1.7490e-01, PNorm = 67.2150, GNorm = 0.9284, lr_0 = 6.4402e-04
Loss = 1.3792e-01, PNorm = 67.2388, GNorm = 0.5005, lr_0 = 6.4358e-04
Loss = 1.8529e-01, PNorm = 67.2608, GNorm = 1.2833, lr_0 = 6.4314e-04
Loss = 1.5950e-01, PNorm = 67.2841, GNorm = 0.8967, lr_0 = 6.4270e-04
Loss = 1.5067e-01, PNorm = 67.3014, GNorm = 1.1871, lr_0 = 6.4226e-04
Loss = 1.6438e-01, PNorm = 67.3185, GNorm = 0.7234, lr_0 = 6.4182e-04
Loss = 1.8341e-01, PNorm = 67.3451, GNorm = 1.4216, lr_0 = 6.4138e-04
Loss = 1.8572e-01, PNorm = 67.3702, GNorm = 1.2659, lr_0 = 6.4094e-04
Loss = 1.5110e-01, PNorm = 67.3917, GNorm = 0.6583, lr_0 = 6.4050e-04
Loss = 1.4703e-01, PNorm = 67.4128, GNorm = 0.6559, lr_0 = 6.4006e-04
Loss = 1.5828e-01, PNorm = 67.4303, GNorm = 0.6403, lr_0 = 6.3962e-04
Loss = 1.5930e-01, PNorm = 67.4489, GNorm = 0.5071, lr_0 = 6.3918e-04
Loss = 1.7266e-01, PNorm = 67.4699, GNorm = 1.2408, lr_0 = 6.3874e-04
Loss = 1.6555e-01, PNorm = 67.4967, GNorm = 0.5512, lr_0 = 6.3831e-04
Loss = 1.6794e-01, PNorm = 67.5210, GNorm = 1.1562, lr_0 = 6.3787e-04
Loss = 1.7671e-01, PNorm = 67.5419, GNorm = 0.5872, lr_0 = 6.3743e-04
Loss = 1.5349e-01, PNorm = 67.5642, GNorm = 1.2688, lr_0 = 6.3700e-04
Loss = 1.5293e-01, PNorm = 67.5891, GNorm = 0.6045, lr_0 = 6.3656e-04
Loss = 1.5248e-01, PNorm = 67.6120, GNorm = 0.8577, lr_0 = 6.3612e-04
Loss = 1.7395e-01, PNorm = 67.6270, GNorm = 0.7945, lr_0 = 6.3569e-04
Loss = 1.6455e-01, PNorm = 67.6431, GNorm = 0.7819, lr_0 = 6.3525e-04
Loss = 1.7290e-01, PNorm = 67.6612, GNorm = 0.6821, lr_0 = 6.3482e-04
Loss = 1.5440e-01, PNorm = 67.6838, GNorm = 0.5651, lr_0 = 6.3438e-04
Loss = 1.4208e-01, PNorm = 67.7067, GNorm = 0.8116, lr_0 = 6.3395e-04
Loss = 1.4391e-01, PNorm = 67.7253, GNorm = 0.7686, lr_0 = 6.3351e-04
Loss = 1.4922e-01, PNorm = 67.7509, GNorm = 1.0719, lr_0 = 6.3308e-04
Loss = 1.4056e-01, PNorm = 67.7767, GNorm = 0.7466, lr_0 = 6.3265e-04
Loss = 1.7526e-01, PNorm = 67.7978, GNorm = 1.1917, lr_0 = 6.3221e-04
Loss = 1.4224e-01, PNorm = 67.8131, GNorm = 0.6413, lr_0 = 6.3178e-04
Loss = 1.7083e-01, PNorm = 67.8276, GNorm = 0.8674, lr_0 = 6.3135e-04
Loss = 1.4944e-01, PNorm = 67.8421, GNorm = 0.8144, lr_0 = 6.3091e-04
Loss = 1.6717e-01, PNorm = 67.8555, GNorm = 0.9323, lr_0 = 6.3048e-04
Loss = 1.4747e-01, PNorm = 67.8761, GNorm = 0.6518, lr_0 = 6.3005e-04
Loss = 1.4624e-01, PNorm = 67.8991, GNorm = 0.6926, lr_0 = 6.2962e-04
Loss = 1.6039e-01, PNorm = 67.9215, GNorm = 0.9667, lr_0 = 6.2919e-04
Loss = 1.7946e-01, PNorm = 67.9426, GNorm = 0.9240, lr_0 = 6.2876e-04
Loss = 1.5308e-01, PNorm = 67.9673, GNorm = 0.7874, lr_0 = 6.2833e-04
Loss = 1.5129e-01, PNorm = 67.9868, GNorm = 0.9189, lr_0 = 6.2789e-04
Loss = 1.4095e-01, PNorm = 68.0036, GNorm = 1.5515, lr_0 = 6.2746e-04
Loss = 1.4483e-01, PNorm = 68.0163, GNorm = 0.7275, lr_0 = 6.2703e-04
Loss = 1.6498e-01, PNorm = 68.0345, GNorm = 1.0040, lr_0 = 6.2661e-04
Loss = 1.4308e-01, PNorm = 68.0551, GNorm = 0.9048, lr_0 = 6.2618e-04
Loss = 1.5486e-01, PNorm = 68.0721, GNorm = 1.5202, lr_0 = 6.2575e-04
Loss = 1.5081e-01, PNorm = 68.0914, GNorm = 0.7529, lr_0 = 6.2532e-04
Loss = 1.4711e-01, PNorm = 68.1112, GNorm = 0.7999, lr_0 = 6.2489e-04
Loss = 1.4398e-01, PNorm = 68.1283, GNorm = 0.8279, lr_0 = 6.2446e-04
Loss = 1.6165e-01, PNorm = 68.1432, GNorm = 0.6527, lr_0 = 6.2403e-04
Loss = 1.5298e-01, PNorm = 68.1600, GNorm = 1.1868, lr_0 = 6.2361e-04
Loss = 1.5519e-01, PNorm = 68.1807, GNorm = 1.1464, lr_0 = 6.2318e-04
Loss = 1.6733e-01, PNorm = 68.1966, GNorm = 0.9420, lr_0 = 6.2275e-04
Loss = 1.5469e-01, PNorm = 68.2204, GNorm = 0.6774, lr_0 = 6.2233e-04
Loss = 1.5869e-01, PNorm = 68.2396, GNorm = 0.8005, lr_0 = 6.2190e-04
Loss = 1.5467e-01, PNorm = 68.2710, GNorm = 1.2939, lr_0 = 6.2147e-04
Loss = 1.4404e-01, PNorm = 68.2956, GNorm = 0.6011, lr_0 = 6.2105e-04
Loss = 1.6829e-01, PNorm = 68.3122, GNorm = 0.9139, lr_0 = 6.2062e-04
Loss = 1.3521e-01, PNorm = 68.3358, GNorm = 0.5925, lr_0 = 6.2020e-04
Loss = 1.6377e-01, PNorm = 68.3572, GNorm = 0.6213, lr_0 = 6.1977e-04
Loss = 1.5860e-01, PNorm = 68.3797, GNorm = 1.0085, lr_0 = 6.1935e-04
Loss = 1.5882e-01, PNorm = 68.3989, GNorm = 0.7818, lr_0 = 6.1892e-04
Loss = 1.4206e-01, PNorm = 68.4145, GNorm = 0.5972, lr_0 = 6.1850e-04
Loss = 1.4976e-01, PNorm = 68.4261, GNorm = 0.8145, lr_0 = 6.1808e-04
Loss = 1.4503e-01, PNorm = 68.4440, GNorm = 0.5633, lr_0 = 6.1765e-04
Loss = 1.4627e-01, PNorm = 68.4568, GNorm = 0.7089, lr_0 = 6.1723e-04
Loss = 1.5578e-01, PNorm = 68.4760, GNorm = 0.7412, lr_0 = 6.1681e-04
Loss = 1.4517e-01, PNorm = 68.4903, GNorm = 0.6942, lr_0 = 6.1638e-04
Loss = 1.8464e-01, PNorm = 68.5052, GNorm = 1.0232, lr_0 = 6.1596e-04
Loss = 1.4922e-01, PNorm = 68.5191, GNorm = 1.6484, lr_0 = 6.1554e-04
Loss = 1.6127e-01, PNorm = 68.5375, GNorm = 0.9059, lr_0 = 6.1512e-04
Loss = 1.7182e-01, PNorm = 68.5580, GNorm = 1.3749, lr_0 = 6.1470e-04
Loss = 1.7342e-01, PNorm = 68.5829, GNorm = 0.8140, lr_0 = 6.1428e-04
Loss = 1.5729e-01, PNorm = 68.6030, GNorm = 1.4408, lr_0 = 6.1385e-04
Loss = 1.5637e-01, PNorm = 68.6235, GNorm = 0.6735, lr_0 = 6.1343e-04
Loss = 1.5381e-01, PNorm = 68.6464, GNorm = 0.7391, lr_0 = 6.1301e-04
Loss = 1.5691e-01, PNorm = 68.6639, GNorm = 0.6388, lr_0 = 6.1259e-04
Loss = 1.5589e-01, PNorm = 68.6898, GNorm = 0.8025, lr_0 = 6.1217e-04
Loss = 1.4489e-01, PNorm = 68.7042, GNorm = 0.7444, lr_0 = 6.1175e-04
Loss = 1.3969e-01, PNorm = 68.7230, GNorm = 0.6303, lr_0 = 6.1134e-04
Loss = 1.3820e-01, PNorm = 68.7411, GNorm = 1.2464, lr_0 = 6.1092e-04
Loss = 1.5818e-01, PNorm = 68.7635, GNorm = 0.9426, lr_0 = 6.1050e-04
Validation mae = 0.248727
Epoch 8
Loss = 1.4583e-01, PNorm = 68.7798, GNorm = 0.8530, lr_0 = 6.1008e-04
Loss = 1.5551e-01, PNorm = 68.8010, GNorm = 0.7970, lr_0 = 6.0966e-04
Loss = 1.3313e-01, PNorm = 68.8180, GNorm = 0.6772, lr_0 = 6.0924e-04
Loss = 1.2390e-01, PNorm = 68.8382, GNorm = 0.6332, lr_0 = 6.0883e-04
Loss = 1.5409e-01, PNorm = 68.8556, GNorm = 0.6977, lr_0 = 6.0841e-04
Loss = 1.5681e-01, PNorm = 68.8764, GNorm = 1.0822, lr_0 = 6.0799e-04
Loss = 1.2946e-01, PNorm = 68.8911, GNorm = 0.6172, lr_0 = 6.0758e-04
Loss = 1.4111e-01, PNorm = 68.9040, GNorm = 0.6885, lr_0 = 6.0716e-04
Loss = 1.3190e-01, PNorm = 68.9164, GNorm = 0.6913, lr_0 = 6.0674e-04
Loss = 1.5019e-01, PNorm = 68.9365, GNorm = 0.7623, lr_0 = 6.0633e-04
Loss = 1.4296e-01, PNorm = 68.9570, GNorm = 0.6199, lr_0 = 6.0591e-04
Loss = 1.3593e-01, PNorm = 68.9763, GNorm = 1.2577, lr_0 = 6.0550e-04
Loss = 1.3641e-01, PNorm = 68.9966, GNorm = 1.9342, lr_0 = 6.0508e-04
Loss = 1.4265e-01, PNorm = 69.0125, GNorm = 0.7079, lr_0 = 6.0467e-04
Loss = 1.5654e-01, PNorm = 69.0350, GNorm = 1.7564, lr_0 = 6.0425e-04
Loss = 1.5041e-01, PNorm = 69.0529, GNorm = 0.5205, lr_0 = 6.0384e-04
Loss = 1.4884e-01, PNorm = 69.0714, GNorm = 0.9073, lr_0 = 6.0343e-04
Loss = 1.4133e-01, PNorm = 69.0928, GNorm = 0.5783, lr_0 = 6.0301e-04
Loss = 1.4625e-01, PNorm = 69.1164, GNorm = 0.9248, lr_0 = 6.0260e-04
Loss = 1.3657e-01, PNorm = 69.1365, GNorm = 0.6224, lr_0 = 6.0219e-04
Loss = 1.4857e-01, PNorm = 69.1564, GNorm = 0.8593, lr_0 = 6.0178e-04
Loss = 1.5917e-01, PNorm = 69.1774, GNorm = 0.7627, lr_0 = 6.0136e-04
Loss = 1.3700e-01, PNorm = 69.1993, GNorm = 0.6272, lr_0 = 6.0095e-04
Loss = 1.4504e-01, PNorm = 69.2191, GNorm = 0.7085, lr_0 = 6.0054e-04
Loss = 1.3493e-01, PNorm = 69.2334, GNorm = 0.6161, lr_0 = 6.0013e-04
Loss = 1.4325e-01, PNorm = 69.2503, GNorm = 0.9428, lr_0 = 5.9972e-04
Loss = 1.3074e-01, PNorm = 69.2699, GNorm = 0.8271, lr_0 = 5.9931e-04
Loss = 1.5259e-01, PNorm = 69.2881, GNorm = 0.7421, lr_0 = 5.9890e-04
Loss = 1.6938e-01, PNorm = 69.3011, GNorm = 0.8803, lr_0 = 5.9849e-04
Loss = 1.4330e-01, PNorm = 69.3231, GNorm = 0.6045, lr_0 = 5.9808e-04
Loss = 1.4673e-01, PNorm = 69.3441, GNorm = 0.8272, lr_0 = 5.9767e-04
Loss = 1.6458e-01, PNorm = 69.3615, GNorm = 1.1709, lr_0 = 5.9726e-04
Loss = 1.5677e-01, PNorm = 69.3838, GNorm = 0.8505, lr_0 = 5.9685e-04
Loss = 1.4709e-01, PNorm = 69.4025, GNorm = 0.6346, lr_0 = 5.9644e-04
Loss = 1.6361e-01, PNorm = 69.4320, GNorm = 1.1640, lr_0 = 5.9603e-04
Loss = 1.5757e-01, PNorm = 69.4548, GNorm = 0.8459, lr_0 = 5.9562e-04
Loss = 1.3583e-01, PNorm = 69.4756, GNorm = 0.6720, lr_0 = 5.9521e-04
Loss = 1.3163e-01, PNorm = 69.4893, GNorm = 1.1658, lr_0 = 5.9481e-04
Loss = 1.3798e-01, PNorm = 69.5055, GNorm = 0.5541, lr_0 = 5.9440e-04
Loss = 1.5629e-01, PNorm = 69.5237, GNorm = 1.5983, lr_0 = 5.9399e-04
Loss = 1.4046e-01, PNorm = 69.5467, GNorm = 0.5352, lr_0 = 5.9358e-04
Loss = 1.7437e-01, PNorm = 69.5687, GNorm = 0.8474, lr_0 = 5.9318e-04
Loss = 1.4776e-01, PNorm = 69.6001, GNorm = 0.6228, lr_0 = 5.9277e-04
Loss = 1.2976e-01, PNorm = 69.6178, GNorm = 0.6586, lr_0 = 5.9236e-04
Loss = 1.2808e-01, PNorm = 69.6351, GNorm = 0.9626, lr_0 = 5.9196e-04
Loss = 1.5192e-01, PNorm = 69.6466, GNorm = 0.6671, lr_0 = 5.9155e-04
Loss = 1.4272e-01, PNorm = 69.6645, GNorm = 0.8184, lr_0 = 5.9115e-04
Loss = 1.6512e-01, PNorm = 69.6829, GNorm = 1.0890, lr_0 = 5.9074e-04
Loss = 1.4236e-01, PNorm = 69.7035, GNorm = 0.8787, lr_0 = 5.9034e-04
Loss = 1.3013e-01, PNorm = 69.7234, GNorm = 0.6288, lr_0 = 5.8993e-04
Loss = 1.6421e-01, PNorm = 69.7372, GNorm = 1.0438, lr_0 = 5.8953e-04
Loss = 1.5565e-01, PNorm = 69.7542, GNorm = 0.7385, lr_0 = 5.8913e-04
Loss = 1.5125e-01, PNorm = 69.7749, GNorm = 0.6332, lr_0 = 5.8872e-04
Loss = 1.4779e-01, PNorm = 69.7970, GNorm = 0.7020, lr_0 = 5.8832e-04
Loss = 1.4593e-01, PNorm = 69.8139, GNorm = 0.5028, lr_0 = 5.8792e-04
Loss = 1.3454e-01, PNorm = 69.8322, GNorm = 0.7244, lr_0 = 5.8751e-04
Loss = 1.4585e-01, PNorm = 69.8528, GNorm = 0.6907, lr_0 = 5.8711e-04
Loss = 1.4972e-01, PNorm = 69.8741, GNorm = 0.9246, lr_0 = 5.8671e-04
Loss = 1.4969e-01, PNorm = 69.9002, GNorm = 0.6292, lr_0 = 5.8631e-04
Loss = 1.3664e-01, PNorm = 69.9265, GNorm = 0.7937, lr_0 = 5.8591e-04
Loss = 1.5710e-01, PNorm = 69.9481, GNorm = 0.6682, lr_0 = 5.8550e-04
Loss = 1.5449e-01, PNorm = 69.9615, GNorm = 0.5030, lr_0 = 5.8510e-04
Loss = 1.4438e-01, PNorm = 69.9765, GNorm = 0.9215, lr_0 = 5.8470e-04
Loss = 1.3233e-01, PNorm = 69.9901, GNorm = 0.8973, lr_0 = 5.8430e-04
Loss = 1.3252e-01, PNorm = 70.0102, GNorm = 0.5514, lr_0 = 5.8390e-04
Loss = 1.4757e-01, PNorm = 70.0324, GNorm = 0.7450, lr_0 = 5.8350e-04
Loss = 1.3984e-01, PNorm = 70.0520, GNorm = 0.5708, lr_0 = 5.8310e-04
Loss = 1.2923e-01, PNorm = 70.0694, GNorm = 0.9376, lr_0 = 5.8270e-04
Loss = 1.4540e-01, PNorm = 70.0803, GNorm = 1.3777, lr_0 = 5.8230e-04
Loss = 1.2826e-01, PNorm = 70.0940, GNorm = 0.6303, lr_0 = 5.8190e-04
Loss = 1.3343e-01, PNorm = 70.1049, GNorm = 0.8271, lr_0 = 5.8151e-04
Loss = 1.4757e-01, PNorm = 70.1183, GNorm = 0.6563, lr_0 = 5.8111e-04
Loss = 1.4284e-01, PNorm = 70.1316, GNorm = 0.5820, lr_0 = 5.8071e-04
Loss = 1.4407e-01, PNorm = 70.1478, GNorm = 0.8308, lr_0 = 5.8031e-04
Loss = 1.4108e-01, PNorm = 70.1715, GNorm = 0.6078, lr_0 = 5.7991e-04
Loss = 1.4275e-01, PNorm = 70.1883, GNorm = 1.0413, lr_0 = 5.7952e-04
Loss = 1.5359e-01, PNorm = 70.2084, GNorm = 1.5430, lr_0 = 5.7912e-04
Loss = 1.5248e-01, PNorm = 70.2239, GNorm = 0.7466, lr_0 = 5.7872e-04
Loss = 1.5466e-01, PNorm = 70.2473, GNorm = 0.8322, lr_0 = 5.7833e-04
Loss = 1.6622e-01, PNorm = 70.2653, GNorm = 0.7290, lr_0 = 5.7793e-04
Loss = 1.5205e-01, PNorm = 70.2875, GNorm = 0.6540, lr_0 = 5.7753e-04
Loss = 1.5611e-01, PNorm = 70.3091, GNorm = 0.9008, lr_0 = 5.7714e-04
Loss = 1.4352e-01, PNorm = 70.3291, GNorm = 0.6602, lr_0 = 5.7674e-04
Loss = 1.4183e-01, PNorm = 70.3445, GNorm = 0.6615, lr_0 = 5.7635e-04
Loss = 1.5003e-01, PNorm = 70.3574, GNorm = 0.5371, lr_0 = 5.7595e-04
Loss = 1.4400e-01, PNorm = 70.3783, GNorm = 0.8444, lr_0 = 5.7556e-04
Loss = 1.4519e-01, PNorm = 70.3993, GNorm = 0.7053, lr_0 = 5.7516e-04
Loss = 1.4786e-01, PNorm = 70.4136, GNorm = 0.7153, lr_0 = 5.7477e-04
Loss = 1.5381e-01, PNorm = 70.4317, GNorm = 0.5842, lr_0 = 5.7438e-04
Loss = 1.6859e-01, PNorm = 70.4462, GNorm = 0.6443, lr_0 = 5.7398e-04
Loss = 1.4917e-01, PNorm = 70.4631, GNorm = 0.8601, lr_0 = 5.7359e-04
Loss = 1.4797e-01, PNorm = 70.4808, GNorm = 0.5882, lr_0 = 5.7320e-04
Loss = 1.5764e-01, PNorm = 70.5057, GNorm = 0.9370, lr_0 = 5.7280e-04
Loss = 1.6156e-01, PNorm = 70.5220, GNorm = 0.7073, lr_0 = 5.7241e-04
Loss = 1.3539e-01, PNorm = 70.5391, GNorm = 0.5342, lr_0 = 5.7202e-04
Loss = 1.4286e-01, PNorm = 70.5498, GNorm = 0.5971, lr_0 = 5.7163e-04
Loss = 1.4154e-01, PNorm = 70.5672, GNorm = 0.6646, lr_0 = 5.7124e-04
Loss = 1.6052e-01, PNorm = 70.5823, GNorm = 0.8812, lr_0 = 5.7084e-04
Loss = 1.5635e-01, PNorm = 70.6011, GNorm = 0.7952, lr_0 = 5.7045e-04
Loss = 1.5166e-01, PNorm = 70.6233, GNorm = 1.1044, lr_0 = 5.7006e-04
Loss = 1.5342e-01, PNorm = 70.6476, GNorm = 1.3221, lr_0 = 5.6967e-04
Loss = 1.6259e-01, PNorm = 70.6695, GNorm = 0.9487, lr_0 = 5.6928e-04
Loss = 1.4929e-01, PNorm = 70.6950, GNorm = 0.7935, lr_0 = 5.6889e-04
Loss = 1.6496e-01, PNorm = 70.7100, GNorm = 0.8217, lr_0 = 5.6850e-04
Loss = 1.5964e-01, PNorm = 70.7290, GNorm = 1.4871, lr_0 = 5.6811e-04
Loss = 1.4004e-01, PNorm = 70.7417, GNorm = 0.7154, lr_0 = 5.6772e-04
Loss = 1.5690e-01, PNorm = 70.7677, GNorm = 1.0259, lr_0 = 5.6733e-04
Loss = 1.4222e-01, PNorm = 70.7880, GNorm = 0.6585, lr_0 = 5.6695e-04
Loss = 1.4935e-01, PNorm = 70.8075, GNorm = 1.1056, lr_0 = 5.6656e-04
Loss = 1.3720e-01, PNorm = 70.8234, GNorm = 1.3660, lr_0 = 5.6617e-04
Loss = 1.6962e-01, PNorm = 70.8371, GNorm = 1.3476, lr_0 = 5.6578e-04
Loss = 1.5015e-01, PNorm = 70.8613, GNorm = 1.3679, lr_0 = 5.6539e-04
Loss = 1.5789e-01, PNorm = 70.8788, GNorm = 1.9565, lr_0 = 5.6501e-04
Loss = 1.6122e-01, PNorm = 70.8956, GNorm = 1.1968, lr_0 = 5.6462e-04
Loss = 1.5529e-01, PNorm = 70.9098, GNorm = 0.8190, lr_0 = 5.6423e-04
Loss = 1.3965e-01, PNorm = 70.9232, GNorm = 0.6982, lr_0 = 5.6385e-04
Loss = 1.5455e-01, PNorm = 70.9343, GNorm = 0.6129, lr_0 = 5.6346e-04
Loss = 1.4428e-01, PNorm = 70.9439, GNorm = 0.9319, lr_0 = 5.6307e-04
Loss = 1.4515e-01, PNorm = 70.9564, GNorm = 0.8691, lr_0 = 5.6269e-04
Loss = 1.8025e-01, PNorm = 70.9803, GNorm = 1.6694, lr_0 = 5.6230e-04
Validation mae = 0.239915
Epoch 9
Loss = 1.5576e-01, PNorm = 71.0072, GNorm = 1.6005, lr_0 = 5.6192e-04
Loss = 1.4440e-01, PNorm = 71.0327, GNorm = 0.6930, lr_0 = 5.6153e-04
Loss = 1.5024e-01, PNorm = 71.0548, GNorm = 0.8090, lr_0 = 5.6115e-04
Loss = 1.3385e-01, PNorm = 71.0730, GNorm = 0.9718, lr_0 = 5.6076e-04
Loss = 1.2703e-01, PNorm = 71.0841, GNorm = 0.7872, lr_0 = 5.6038e-04
Loss = 1.4806e-01, PNorm = 71.0999, GNorm = 0.6912, lr_0 = 5.6000e-04
Loss = 1.3158e-01, PNorm = 71.1100, GNorm = 1.0369, lr_0 = 5.5961e-04
Loss = 1.3206e-01, PNorm = 71.1289, GNorm = 0.5729, lr_0 = 5.5923e-04
Loss = 1.3163e-01, PNorm = 71.1458, GNorm = 0.6211, lr_0 = 5.5885e-04
Loss = 1.4351e-01, PNorm = 71.1654, GNorm = 0.7454, lr_0 = 5.5846e-04
Loss = 1.6543e-01, PNorm = 71.1867, GNorm = 1.0701, lr_0 = 5.5808e-04
Loss = 1.4046e-01, PNorm = 71.2191, GNorm = 0.8536, lr_0 = 5.5770e-04
Loss = 1.4934e-01, PNorm = 71.2423, GNorm = 0.7232, lr_0 = 5.5732e-04
Loss = 1.2198e-01, PNorm = 71.2620, GNorm = 1.1659, lr_0 = 5.5693e-04
Loss = 1.5700e-01, PNorm = 71.2828, GNorm = 1.0604, lr_0 = 5.5655e-04
Loss = 1.4542e-01, PNorm = 71.3027, GNorm = 0.6587, lr_0 = 5.5617e-04
Loss = 1.6000e-01, PNorm = 71.3126, GNorm = 0.6338, lr_0 = 5.5579e-04
Loss = 1.6039e-01, PNorm = 71.3262, GNorm = 1.1070, lr_0 = 5.5541e-04
Loss = 1.6213e-01, PNorm = 71.3507, GNorm = 0.9058, lr_0 = 5.5503e-04
Loss = 1.3711e-01, PNorm = 71.3706, GNorm = 0.5862, lr_0 = 5.5465e-04
Loss = 1.4401e-01, PNorm = 71.3871, GNorm = 0.9686, lr_0 = 5.5427e-04
Loss = 1.5960e-01, PNorm = 71.3949, GNorm = 0.6984, lr_0 = 5.5389e-04
Loss = 1.4204e-01, PNorm = 71.4180, GNorm = 0.8493, lr_0 = 5.5351e-04
Loss = 1.3856e-01, PNorm = 71.4386, GNorm = 1.5169, lr_0 = 5.5313e-04
Loss = 1.5256e-01, PNorm = 71.4622, GNorm = 0.6293, lr_0 = 5.5275e-04
Loss = 1.4214e-01, PNorm = 71.4822, GNorm = 0.8161, lr_0 = 5.5237e-04
Loss = 1.3494e-01, PNorm = 71.5000, GNorm = 1.0547, lr_0 = 5.5199e-04
Loss = 1.3648e-01, PNorm = 71.5163, GNorm = 0.7025, lr_0 = 5.5162e-04
Loss = 1.3313e-01, PNorm = 71.5354, GNorm = 0.9514, lr_0 = 5.5124e-04
Loss = 1.3703e-01, PNorm = 71.5567, GNorm = 0.6715, lr_0 = 5.5086e-04
Loss = 1.4963e-01, PNorm = 71.5751, GNorm = 0.6491, lr_0 = 5.5048e-04
Loss = 1.2722e-01, PNorm = 71.5921, GNorm = 0.5246, lr_0 = 5.5011e-04
Loss = 1.3783e-01, PNorm = 71.6143, GNorm = 0.7311, lr_0 = 5.4973e-04
Loss = 1.5897e-01, PNorm = 71.6310, GNorm = 0.7091, lr_0 = 5.4935e-04
Loss = 1.4155e-01, PNorm = 71.6518, GNorm = 0.9716, lr_0 = 5.4898e-04
Loss = 1.4018e-01, PNorm = 71.6696, GNorm = 0.7044, lr_0 = 5.4860e-04
Loss = 1.6830e-01, PNorm = 71.6937, GNorm = 0.8077, lr_0 = 5.4822e-04
Loss = 1.5456e-01, PNorm = 71.6979, GNorm = 0.6631, lr_0 = 5.4785e-04
Loss = 1.4307e-01, PNorm = 71.7119, GNorm = 1.1326, lr_0 = 5.4747e-04
Loss = 1.5891e-01, PNorm = 71.7280, GNorm = 0.5557, lr_0 = 5.4710e-04
Loss = 1.5494e-01, PNorm = 71.7425, GNorm = 0.7761, lr_0 = 5.4672e-04
Loss = 1.6334e-01, PNorm = 71.7636, GNorm = 0.9426, lr_0 = 5.4635e-04
Loss = 1.5124e-01, PNorm = 71.7827, GNorm = 0.8344, lr_0 = 5.4597e-04
Loss = 1.5111e-01, PNorm = 71.8066, GNorm = 0.9178, lr_0 = 5.4560e-04
Loss = 1.2374e-01, PNorm = 71.8219, GNorm = 0.6324, lr_0 = 5.4523e-04
Loss = 1.2962e-01, PNorm = 71.8394, GNorm = 0.6423, lr_0 = 5.4485e-04
Loss = 1.3710e-01, PNorm = 71.8523, GNorm = 0.5549, lr_0 = 5.4448e-04
Loss = 1.1392e-01, PNorm = 71.8666, GNorm = 0.5212, lr_0 = 5.4411e-04
Loss = 1.2399e-01, PNorm = 71.8740, GNorm = 1.1718, lr_0 = 5.4373e-04
Loss = 1.3817e-01, PNorm = 71.8882, GNorm = 0.7531, lr_0 = 5.4336e-04
Loss = 1.4216e-01, PNorm = 71.9001, GNorm = 0.8284, lr_0 = 5.4299e-04
Loss = 1.4258e-01, PNorm = 71.9191, GNorm = 0.6603, lr_0 = 5.4262e-04
Loss = 1.5556e-01, PNorm = 71.9386, GNorm = 0.6148, lr_0 = 5.4225e-04
Loss = 1.2103e-01, PNorm = 71.9615, GNorm = 0.5817, lr_0 = 5.4187e-04
Loss = 1.6106e-01, PNorm = 71.9728, GNorm = 1.3461, lr_0 = 5.4150e-04
Loss = 1.2969e-01, PNorm = 71.9911, GNorm = 1.0440, lr_0 = 5.4113e-04
Loss = 1.5027e-01, PNorm = 72.0096, GNorm = 1.2009, lr_0 = 5.4076e-04
Loss = 1.5160e-01, PNorm = 72.0349, GNorm = 0.6280, lr_0 = 5.4039e-04
Loss = 1.4871e-01, PNorm = 72.0516, GNorm = 0.7427, lr_0 = 5.4002e-04
Loss = 1.5343e-01, PNorm = 72.0715, GNorm = 0.7804, lr_0 = 5.3965e-04
Loss = 1.3820e-01, PNorm = 72.0973, GNorm = 1.3416, lr_0 = 5.3928e-04
Loss = 1.2917e-01, PNorm = 72.1135, GNorm = 0.6479, lr_0 = 5.3891e-04
Loss = 1.4470e-01, PNorm = 72.1256, GNorm = 0.6648, lr_0 = 5.3854e-04
Loss = 1.5566e-01, PNorm = 72.1337, GNorm = 1.2510, lr_0 = 5.3817e-04
Loss = 1.4584e-01, PNorm = 72.1547, GNorm = 0.6802, lr_0 = 5.3781e-04
Loss = 1.3354e-01, PNorm = 72.1745, GNorm = 0.6760, lr_0 = 5.3744e-04
Loss = 1.3494e-01, PNorm = 72.1915, GNorm = 0.7763, lr_0 = 5.3707e-04
Loss = 1.3599e-01, PNorm = 72.2145, GNorm = 0.7748, lr_0 = 5.3670e-04
Loss = 1.6188e-01, PNorm = 72.2324, GNorm = 0.5771, lr_0 = 5.3633e-04
Loss = 1.6988e-01, PNorm = 72.2545, GNorm = 0.9166, lr_0 = 5.3597e-04
Loss = 1.4349e-01, PNorm = 72.2705, GNorm = 0.6875, lr_0 = 5.3560e-04
Loss = 1.2515e-01, PNorm = 72.2889, GNorm = 0.8182, lr_0 = 5.3523e-04
Loss = 1.4075e-01, PNorm = 72.3089, GNorm = 0.8419, lr_0 = 5.3486e-04
Loss = 1.5266e-01, PNorm = 72.3247, GNorm = 1.0328, lr_0 = 5.3450e-04
Loss = 1.3230e-01, PNorm = 72.3391, GNorm = 0.6938, lr_0 = 5.3413e-04
Loss = 1.3045e-01, PNorm = 72.3503, GNorm = 1.0124, lr_0 = 5.3377e-04
Loss = 1.3146e-01, PNorm = 72.3664, GNorm = 0.8718, lr_0 = 5.3340e-04
Loss = 1.4315e-01, PNorm = 72.3853, GNorm = 0.9705, lr_0 = 5.3304e-04
Loss = 1.6324e-01, PNorm = 72.4058, GNorm = 1.4574, lr_0 = 5.3267e-04
Loss = 1.2167e-01, PNorm = 72.4200, GNorm = 0.7497, lr_0 = 5.3231e-04
Loss = 1.3293e-01, PNorm = 72.4287, GNorm = 0.7504, lr_0 = 5.3194e-04
Loss = 1.3660e-01, PNorm = 72.4412, GNorm = 0.7743, lr_0 = 5.3158e-04
Loss = 1.6888e-01, PNorm = 72.4571, GNorm = 0.4488, lr_0 = 5.3121e-04
Loss = 1.4741e-01, PNorm = 72.4710, GNorm = 0.9022, lr_0 = 5.3085e-04
Loss = 1.3882e-01, PNorm = 72.4812, GNorm = 0.5235, lr_0 = 5.3048e-04
Loss = 1.4668e-01, PNorm = 72.5002, GNorm = 0.5752, lr_0 = 5.3012e-04
Loss = 1.4716e-01, PNorm = 72.5199, GNorm = 0.4766, lr_0 = 5.2976e-04
Loss = 1.2452e-01, PNorm = 72.5372, GNorm = 0.7314, lr_0 = 5.2939e-04
Loss = 1.3984e-01, PNorm = 72.5519, GNorm = 1.9258, lr_0 = 5.2903e-04
Loss = 1.5047e-01, PNorm = 72.5697, GNorm = 0.8048, lr_0 = 5.2867e-04
Loss = 1.5538e-01, PNorm = 72.5863, GNorm = 0.6158, lr_0 = 5.2831e-04
Loss = 1.5148e-01, PNorm = 72.6095, GNorm = 0.9199, lr_0 = 5.2795e-04
Loss = 1.3522e-01, PNorm = 72.6263, GNorm = 0.6014, lr_0 = 5.2758e-04
Loss = 1.4734e-01, PNorm = 72.6435, GNorm = 0.8515, lr_0 = 5.2722e-04
Loss = 1.4653e-01, PNorm = 72.6538, GNorm = 0.7349, lr_0 = 5.2686e-04
Loss = 1.1544e-01, PNorm = 72.6677, GNorm = 0.7041, lr_0 = 5.2650e-04
Loss = 1.5226e-01, PNorm = 72.6778, GNorm = 0.7474, lr_0 = 5.2614e-04
Loss = 1.3133e-01, PNorm = 72.6888, GNorm = 0.8646, lr_0 = 5.2578e-04
Loss = 1.3314e-01, PNorm = 72.6949, GNorm = 0.9079, lr_0 = 5.2542e-04
Loss = 1.3126e-01, PNorm = 72.7072, GNorm = 0.7339, lr_0 = 5.2506e-04
Loss = 1.5293e-01, PNorm = 72.7252, GNorm = 0.8324, lr_0 = 5.2470e-04
Loss = 1.3804e-01, PNorm = 72.7447, GNorm = 0.5526, lr_0 = 5.2434e-04
Loss = 1.2332e-01, PNorm = 72.7627, GNorm = 0.5482, lr_0 = 5.2398e-04
Loss = 1.2762e-01, PNorm = 72.7785, GNorm = 0.6079, lr_0 = 5.2362e-04
Loss = 1.3421e-01, PNorm = 72.7917, GNorm = 0.7091, lr_0 = 5.2326e-04
Loss = 1.3795e-01, PNorm = 72.8003, GNorm = 0.5703, lr_0 = 5.2290e-04
Loss = 1.5874e-01, PNorm = 72.8075, GNorm = 0.7072, lr_0 = 5.2255e-04
Loss = 1.3498e-01, PNorm = 72.8227, GNorm = 0.7330, lr_0 = 5.2219e-04
Loss = 1.4530e-01, PNorm = 72.8318, GNorm = 0.8365, lr_0 = 5.2183e-04
Loss = 1.3088e-01, PNorm = 72.8447, GNorm = 0.7186, lr_0 = 5.2147e-04
Loss = 1.2541e-01, PNorm = 72.8554, GNorm = 0.6547, lr_0 = 5.2112e-04
Loss = 1.4433e-01, PNorm = 72.8674, GNorm = 0.9898, lr_0 = 5.2076e-04
Loss = 1.3108e-01, PNorm = 72.8807, GNorm = 0.6108, lr_0 = 5.2040e-04
Loss = 1.3362e-01, PNorm = 72.8977, GNorm = 0.8233, lr_0 = 5.2005e-04
Loss = 1.3545e-01, PNorm = 72.9099, GNorm = 1.2315, lr_0 = 5.1969e-04
Loss = 1.3972e-01, PNorm = 72.9268, GNorm = 0.6012, lr_0 = 5.1933e-04
Loss = 1.4277e-01, PNorm = 72.9431, GNorm = 0.5824, lr_0 = 5.1898e-04
Loss = 1.4299e-01, PNorm = 72.9594, GNorm = 0.8125, lr_0 = 5.1862e-04
Loss = 1.2815e-01, PNorm = 72.9741, GNorm = 0.6015, lr_0 = 5.1827e-04
Loss = 1.4472e-01, PNorm = 72.9904, GNorm = 0.5171, lr_0 = 5.1791e-04
Validation mae = 0.238801
Epoch 10
Loss = 1.2539e-01, PNorm = 73.0042, GNorm = 0.3882, lr_0 = 5.1756e-04
Loss = 1.1831e-01, PNorm = 73.0172, GNorm = 0.5551, lr_0 = 5.1720e-04
Loss = 1.1481e-01, PNorm = 73.0342, GNorm = 0.8476, lr_0 = 5.1685e-04
Loss = 1.2656e-01, PNorm = 73.0432, GNorm = 0.6111, lr_0 = 5.1649e-04
Loss = 1.3096e-01, PNorm = 73.0519, GNorm = 0.6166, lr_0 = 5.1614e-04
Loss = 1.3111e-01, PNorm = 73.0717, GNorm = 0.9168, lr_0 = 5.1579e-04
Loss = 1.3369e-01, PNorm = 73.0936, GNorm = 0.9970, lr_0 = 5.1543e-04
Loss = 1.2942e-01, PNorm = 73.1101, GNorm = 1.3011, lr_0 = 5.1508e-04
Loss = 1.3269e-01, PNorm = 73.1350, GNorm = 0.8486, lr_0 = 5.1473e-04
Loss = 1.4497e-01, PNorm = 73.1534, GNorm = 1.0452, lr_0 = 5.1437e-04
Loss = 1.2508e-01, PNorm = 73.1752, GNorm = 0.8434, lr_0 = 5.1402e-04
Loss = 1.3347e-01, PNorm = 73.1883, GNorm = 0.7948, lr_0 = 5.1367e-04
Loss = 1.2650e-01, PNorm = 73.2019, GNorm = 1.0159, lr_0 = 5.1332e-04
Loss = 1.1784e-01, PNorm = 73.2109, GNorm = 0.6462, lr_0 = 5.1297e-04
Loss = 1.2017e-01, PNorm = 73.2262, GNorm = 0.8970, lr_0 = 5.1262e-04
Loss = 1.2802e-01, PNorm = 73.2467, GNorm = 0.6628, lr_0 = 5.1226e-04
Loss = 1.3824e-01, PNorm = 73.2637, GNorm = 1.4015, lr_0 = 5.1191e-04
Loss = 1.3474e-01, PNorm = 73.2810, GNorm = 0.9745, lr_0 = 5.1156e-04
Loss = 1.3455e-01, PNorm = 73.2941, GNorm = 0.6605, lr_0 = 5.1121e-04
Loss = 1.1385e-01, PNorm = 73.3039, GNorm = 0.5074, lr_0 = 5.1086e-04
Loss = 1.3434e-01, PNorm = 73.3143, GNorm = 0.6597, lr_0 = 5.1051e-04
Loss = 1.2628e-01, PNorm = 73.3251, GNorm = 0.6180, lr_0 = 5.1016e-04
Loss = 1.4261e-01, PNorm = 73.3329, GNorm = 0.8390, lr_0 = 5.0981e-04
Loss = 1.1848e-01, PNorm = 73.3491, GNorm = 0.6677, lr_0 = 5.0946e-04
Loss = 1.4319e-01, PNorm = 73.3542, GNorm = 0.9262, lr_0 = 5.0911e-04
Loss = 1.4171e-01, PNorm = 73.3740, GNorm = 0.8921, lr_0 = 5.0877e-04
Loss = 1.4237e-01, PNorm = 73.3937, GNorm = 0.5482, lr_0 = 5.0842e-04
Loss = 1.3243e-01, PNorm = 73.4152, GNorm = 0.8753, lr_0 = 5.0807e-04
Loss = 1.3362e-01, PNorm = 73.4291, GNorm = 0.6857, lr_0 = 5.0772e-04
Loss = 1.4691e-01, PNorm = 73.4440, GNorm = 0.5806, lr_0 = 5.0737e-04
Loss = 1.3540e-01, PNorm = 73.4579, GNorm = 0.6520, lr_0 = 5.0703e-04
Loss = 1.3918e-01, PNorm = 73.4741, GNorm = 0.4846, lr_0 = 5.0668e-04
Loss = 1.3280e-01, PNorm = 73.4923, GNorm = 0.8326, lr_0 = 5.0633e-04
Loss = 1.5028e-01, PNorm = 73.5079, GNorm = 0.8134, lr_0 = 5.0598e-04
Loss = 1.4571e-01, PNorm = 73.5251, GNorm = 0.8716, lr_0 = 5.0564e-04
Loss = 1.3359e-01, PNorm = 73.5400, GNorm = 0.5831, lr_0 = 5.0529e-04
Loss = 1.2730e-01, PNorm = 73.5597, GNorm = 0.5048, lr_0 = 5.0494e-04
Loss = 1.4589e-01, PNorm = 73.5823, GNorm = 0.8139, lr_0 = 5.0460e-04
Loss = 1.3307e-01, PNorm = 73.5999, GNorm = 0.6658, lr_0 = 5.0425e-04
Loss = 1.2660e-01, PNorm = 73.6128, GNorm = 0.4444, lr_0 = 5.0391e-04
Loss = 1.3503e-01, PNorm = 73.6285, GNorm = 0.6694, lr_0 = 5.0356e-04
Loss = 1.4152e-01, PNorm = 73.6478, GNorm = 0.8793, lr_0 = 5.0322e-04
Loss = 1.3760e-01, PNorm = 73.6652, GNorm = 0.6535, lr_0 = 5.0287e-04
Loss = 1.4165e-01, PNorm = 73.6768, GNorm = 0.8599, lr_0 = 5.0253e-04
Loss = 1.1347e-01, PNorm = 73.6902, GNorm = 1.2613, lr_0 = 5.0218e-04
Loss = 1.3442e-01, PNorm = 73.7018, GNorm = 0.5588, lr_0 = 5.0184e-04
Loss = 1.2369e-01, PNorm = 73.7120, GNorm = 1.0323, lr_0 = 5.0150e-04
Loss = 1.1739e-01, PNorm = 73.7262, GNorm = 1.0581, lr_0 = 5.0115e-04
Loss = 1.1968e-01, PNorm = 73.7395, GNorm = 0.5278, lr_0 = 5.0081e-04
Loss = 1.4646e-01, PNorm = 73.7498, GNorm = 0.8045, lr_0 = 5.0047e-04
Loss = 1.2030e-01, PNorm = 73.7635, GNorm = 0.8545, lr_0 = 5.0012e-04
Loss = 1.3147e-01, PNorm = 73.7789, GNorm = 0.5217, lr_0 = 4.9978e-04
Loss = 1.2457e-01, PNorm = 73.7969, GNorm = 1.0227, lr_0 = 4.9944e-04
Loss = 1.2324e-01, PNorm = 73.8217, GNorm = 0.5938, lr_0 = 4.9910e-04
Loss = 1.6327e-01, PNorm = 73.8368, GNorm = 0.8923, lr_0 = 4.9875e-04
Loss = 1.2340e-01, PNorm = 73.8610, GNorm = 1.0429, lr_0 = 4.9841e-04
Loss = 1.3464e-01, PNorm = 73.8750, GNorm = 0.9226, lr_0 = 4.9807e-04
Loss = 1.4195e-01, PNorm = 73.8853, GNorm = 0.7434, lr_0 = 4.9773e-04
Loss = 1.2480e-01, PNorm = 73.9013, GNorm = 0.7234, lr_0 = 4.9739e-04
Loss = 1.5471e-01, PNorm = 73.9181, GNorm = 0.6321, lr_0 = 4.9705e-04
Loss = 1.3515e-01, PNorm = 73.9324, GNorm = 0.9768, lr_0 = 4.9671e-04
Loss = 1.3479e-01, PNorm = 73.9408, GNorm = 0.5287, lr_0 = 4.9637e-04
Loss = 1.3481e-01, PNorm = 73.9553, GNorm = 0.6872, lr_0 = 4.9603e-04
Loss = 1.4121e-01, PNorm = 73.9713, GNorm = 0.6249, lr_0 = 4.9569e-04
Loss = 1.3215e-01, PNorm = 73.9899, GNorm = 0.6855, lr_0 = 4.9535e-04
Loss = 1.5056e-01, PNorm = 74.0036, GNorm = 1.1123, lr_0 = 4.9501e-04
Loss = 1.2825e-01, PNorm = 74.0207, GNorm = 0.6161, lr_0 = 4.9467e-04
Loss = 1.2250e-01, PNorm = 74.0388, GNorm = 0.5993, lr_0 = 4.9433e-04
Loss = 1.2284e-01, PNorm = 74.0496, GNorm = 0.8605, lr_0 = 4.9399e-04
Loss = 1.3409e-01, PNorm = 74.0685, GNorm = 0.7272, lr_0 = 4.9365e-04
Loss = 1.3702e-01, PNorm = 74.0815, GNorm = 0.7941, lr_0 = 4.9332e-04
Loss = 1.2794e-01, PNorm = 74.0975, GNorm = 0.7568, lr_0 = 4.9298e-04
Loss = 1.1752e-01, PNorm = 74.1105, GNorm = 0.8190, lr_0 = 4.9264e-04
Loss = 1.2455e-01, PNorm = 74.1225, GNorm = 0.7877, lr_0 = 4.9230e-04
Loss = 1.7622e-01, PNorm = 74.1349, GNorm = 1.0561, lr_0 = 4.9197e-04
Loss = 1.3385e-01, PNorm = 74.1466, GNorm = 0.7459, lr_0 = 4.9163e-04
Loss = 1.4786e-01, PNorm = 74.1622, GNorm = 0.8267, lr_0 = 4.9129e-04
Loss = 1.5166e-01, PNorm = 74.1786, GNorm = 0.6896, lr_0 = 4.9095e-04
Loss = 1.4854e-01, PNorm = 74.1995, GNorm = 1.3814, lr_0 = 4.9062e-04
Loss = 1.3186e-01, PNorm = 74.2182, GNorm = 1.0736, lr_0 = 4.9028e-04
Loss = 1.2531e-01, PNorm = 74.2359, GNorm = 1.1445, lr_0 = 4.8995e-04
Loss = 1.2381e-01, PNorm = 74.2462, GNorm = 0.8251, lr_0 = 4.8961e-04
Loss = 1.4399e-01, PNorm = 74.2663, GNorm = 0.5641, lr_0 = 4.8928e-04
Loss = 1.3153e-01, PNorm = 74.2824, GNorm = 0.7296, lr_0 = 4.8894e-04
Loss = 1.2422e-01, PNorm = 74.3008, GNorm = 0.9179, lr_0 = 4.8861e-04
Loss = 1.3196e-01, PNorm = 74.3128, GNorm = 0.5273, lr_0 = 4.8827e-04
Loss = 1.2008e-01, PNorm = 74.3286, GNorm = 0.7647, lr_0 = 4.8794e-04
Loss = 1.2528e-01, PNorm = 74.3414, GNorm = 0.7155, lr_0 = 4.8760e-04
Loss = 1.4466e-01, PNorm = 74.3579, GNorm = 0.5862, lr_0 = 4.8727e-04
Loss = 1.3109e-01, PNorm = 74.3717, GNorm = 0.5774, lr_0 = 4.8693e-04
Loss = 1.2918e-01, PNorm = 74.3901, GNorm = 0.6556, lr_0 = 4.8660e-04
Loss = 1.1215e-01, PNorm = 74.4127, GNorm = 0.4569, lr_0 = 4.8627e-04
Loss = 1.4309e-01, PNorm = 74.4281, GNorm = 0.7400, lr_0 = 4.8593e-04
Loss = 1.4648e-01, PNorm = 74.4450, GNorm = 0.5975, lr_0 = 4.8560e-04
Loss = 1.5235e-01, PNorm = 74.4549, GNorm = 0.6044, lr_0 = 4.8527e-04
Loss = 1.3756e-01, PNorm = 74.4720, GNorm = 1.0538, lr_0 = 4.8494e-04
Loss = 1.4873e-01, PNorm = 74.4870, GNorm = 0.5692, lr_0 = 4.8460e-04
Loss = 1.2892e-01, PNorm = 74.5031, GNorm = 0.6121, lr_0 = 4.8427e-04
Loss = 1.3303e-01, PNorm = 74.5150, GNorm = 0.5503, lr_0 = 4.8394e-04
Loss = 1.4107e-01, PNorm = 74.5330, GNorm = 0.7863, lr_0 = 4.8361e-04
Loss = 1.4344e-01, PNorm = 74.5414, GNorm = 0.7191, lr_0 = 4.8328e-04
Loss = 1.3842e-01, PNorm = 74.5589, GNorm = 1.5299, lr_0 = 4.8295e-04
Loss = 1.4453e-01, PNorm = 74.5743, GNorm = 1.0118, lr_0 = 4.8262e-04
Loss = 1.3916e-01, PNorm = 74.5957, GNorm = 0.7820, lr_0 = 4.8228e-04
Loss = 1.4597e-01, PNorm = 74.6146, GNorm = 0.6001, lr_0 = 4.8195e-04
Loss = 1.3183e-01, PNorm = 74.6263, GNorm = 0.7085, lr_0 = 4.8162e-04
Loss = 1.4214e-01, PNorm = 74.6393, GNorm = 0.7765, lr_0 = 4.8129e-04
Loss = 1.6084e-01, PNorm = 74.6518, GNorm = 0.9797, lr_0 = 4.8096e-04
Loss = 1.4057e-01, PNorm = 74.6704, GNorm = 1.1479, lr_0 = 4.8064e-04
Loss = 1.3412e-01, PNorm = 74.6857, GNorm = 0.6028, lr_0 = 4.8031e-04
Loss = 1.2609e-01, PNorm = 74.6979, GNorm = 0.5746, lr_0 = 4.7998e-04
Loss = 1.2652e-01, PNorm = 74.7032, GNorm = 0.6636, lr_0 = 4.7965e-04
Loss = 1.3482e-01, PNorm = 74.7186, GNorm = 0.9688, lr_0 = 4.7932e-04
Loss = 1.4104e-01, PNorm = 74.7314, GNorm = 0.7216, lr_0 = 4.7899e-04
Loss = 1.3317e-01, PNorm = 74.7482, GNorm = 0.7333, lr_0 = 4.7866e-04
Loss = 1.2273e-01, PNorm = 74.7635, GNorm = 0.7050, lr_0 = 4.7833e-04
Loss = 1.2565e-01, PNorm = 74.7811, GNorm = 0.6339, lr_0 = 4.7801e-04
Loss = 1.3131e-01, PNorm = 74.7972, GNorm = 0.6117, lr_0 = 4.7768e-04
Loss = 1.5052e-01, PNorm = 74.8078, GNorm = 0.9040, lr_0 = 4.7735e-04
Loss = 1.5023e-01, PNorm = 74.8179, GNorm = 1.5460, lr_0 = 4.7703e-04
Validation mae = 0.238568
Epoch 11
Loss = 1.1959e-01, PNorm = 74.8311, GNorm = 0.7003, lr_0 = 4.7670e-04
Loss = 1.2226e-01, PNorm = 74.8434, GNorm = 0.5944, lr_0 = 4.7637e-04
Loss = 1.1267e-01, PNorm = 74.8534, GNorm = 0.6325, lr_0 = 4.7605e-04
Loss = 1.1653e-01, PNorm = 74.8659, GNorm = 0.6820, lr_0 = 4.7572e-04
Loss = 1.3457e-01, PNorm = 74.8747, GNorm = 0.9858, lr_0 = 4.7539e-04
Loss = 1.4609e-01, PNorm = 74.8957, GNorm = 0.8568, lr_0 = 4.7507e-04
Loss = 1.3178e-01, PNorm = 74.9138, GNorm = 0.6015, lr_0 = 4.7474e-04
Loss = 1.1521e-01, PNorm = 74.9271, GNorm = 0.8856, lr_0 = 4.7442e-04
Loss = 1.3088e-01, PNorm = 74.9373, GNorm = 0.5288, lr_0 = 4.7409e-04
Loss = 1.1949e-01, PNorm = 74.9563, GNorm = 0.6371, lr_0 = 4.7377e-04
Loss = 1.2381e-01, PNorm = 74.9698, GNorm = 0.5931, lr_0 = 4.7344e-04
Loss = 1.2848e-01, PNorm = 74.9838, GNorm = 0.6792, lr_0 = 4.7312e-04
Loss = 1.2180e-01, PNorm = 74.9989, GNorm = 0.7363, lr_0 = 4.7279e-04
Loss = 1.2350e-01, PNorm = 75.0186, GNorm = 0.9701, lr_0 = 4.7247e-04
Loss = 1.2078e-01, PNorm = 75.0334, GNorm = 1.1474, lr_0 = 4.7215e-04
Loss = 1.3137e-01, PNorm = 75.0514, GNorm = 0.8308, lr_0 = 4.7182e-04
Loss = 1.2476e-01, PNorm = 75.0642, GNorm = 0.7311, lr_0 = 4.7150e-04
Loss = 1.3655e-01, PNorm = 75.0794, GNorm = 1.2675, lr_0 = 4.7118e-04
Loss = 1.2135e-01, PNorm = 75.0970, GNorm = 0.5001, lr_0 = 4.7085e-04
Loss = 1.3427e-01, PNorm = 75.1109, GNorm = 1.0360, lr_0 = 4.7053e-04
Loss = 1.3702e-01, PNorm = 75.1230, GNorm = 0.7675, lr_0 = 4.7021e-04
Loss = 1.2911e-01, PNorm = 75.1342, GNorm = 0.6847, lr_0 = 4.6989e-04
Loss = 1.3134e-01, PNorm = 75.1485, GNorm = 1.4033, lr_0 = 4.6957e-04
Loss = 1.3828e-01, PNorm = 75.1574, GNorm = 0.6747, lr_0 = 4.6924e-04
Loss = 1.3838e-01, PNorm = 75.1713, GNorm = 1.2893, lr_0 = 4.6892e-04
Loss = 1.3373e-01, PNorm = 75.1872, GNorm = 0.6569, lr_0 = 4.6860e-04
Loss = 1.3057e-01, PNorm = 75.1995, GNorm = 0.9627, lr_0 = 4.6828e-04
Loss = 1.3002e-01, PNorm = 75.2135, GNorm = 0.6505, lr_0 = 4.6796e-04
Loss = 1.0639e-01, PNorm = 75.2260, GNorm = 0.7297, lr_0 = 4.6764e-04
Loss = 1.2370e-01, PNorm = 75.2375, GNorm = 0.7599, lr_0 = 4.6732e-04
Loss = 1.1958e-01, PNorm = 75.2484, GNorm = 0.7751, lr_0 = 4.6700e-04
Loss = 1.2092e-01, PNorm = 75.2628, GNorm = 0.7372, lr_0 = 4.6668e-04
Loss = 1.2835e-01, PNorm = 75.2801, GNorm = 0.5191, lr_0 = 4.6636e-04
Loss = 1.2032e-01, PNorm = 75.2953, GNorm = 0.9011, lr_0 = 4.6604e-04
Loss = 1.2021e-01, PNorm = 75.3021, GNorm = 0.8973, lr_0 = 4.6572e-04
Loss = 1.4038e-01, PNorm = 75.3165, GNorm = 0.7230, lr_0 = 4.6540e-04
Loss = 1.1823e-01, PNorm = 75.3283, GNorm = 1.1068, lr_0 = 4.6508e-04
Loss = 1.2293e-01, PNorm = 75.3420, GNorm = 0.4409, lr_0 = 4.6476e-04
Loss = 1.2788e-01, PNorm = 75.3610, GNorm = 0.5856, lr_0 = 4.6445e-04
Loss = 1.4158e-01, PNorm = 75.3761, GNorm = 0.6357, lr_0 = 4.6413e-04
Loss = 1.3680e-01, PNorm = 75.3854, GNorm = 2.1386, lr_0 = 4.6381e-04
Loss = 1.4478e-01, PNorm = 75.3999, GNorm = 0.6769, lr_0 = 4.6349e-04
Loss = 1.2429e-01, PNorm = 75.4192, GNorm = 1.0017, lr_0 = 4.6317e-04
Loss = 1.2241e-01, PNorm = 75.4302, GNorm = 0.6301, lr_0 = 4.6286e-04
Loss = 1.5665e-01, PNorm = 75.4426, GNorm = 0.6127, lr_0 = 4.6254e-04
Loss = 1.2100e-01, PNorm = 75.4560, GNorm = 0.5976, lr_0 = 4.6222e-04
Loss = 1.1463e-01, PNorm = 75.4687, GNorm = 0.7504, lr_0 = 4.6191e-04
Loss = 1.3900e-01, PNorm = 75.4825, GNorm = 0.7500, lr_0 = 4.6159e-04
Loss = 1.3623e-01, PNorm = 75.4903, GNorm = 0.5511, lr_0 = 4.6127e-04
Loss = 1.2718e-01, PNorm = 75.4997, GNorm = 0.7772, lr_0 = 4.6096e-04
Loss = 1.2790e-01, PNorm = 75.5098, GNorm = 0.8700, lr_0 = 4.6064e-04
Loss = 1.2535e-01, PNorm = 75.5202, GNorm = 0.5112, lr_0 = 4.6033e-04
Loss = 1.2536e-01, PNorm = 75.5338, GNorm = 0.6306, lr_0 = 4.6001e-04
Loss = 1.2657e-01, PNorm = 75.5436, GNorm = 0.5219, lr_0 = 4.5970e-04
Loss = 1.2302e-01, PNorm = 75.5577, GNorm = 0.5683, lr_0 = 4.5938e-04
Loss = 1.2430e-01, PNorm = 75.5733, GNorm = 0.8615, lr_0 = 4.5907e-04
Loss = 1.1883e-01, PNorm = 75.5857, GNorm = 0.6136, lr_0 = 4.5875e-04
Loss = 1.1384e-01, PNorm = 75.5980, GNorm = 0.6663, lr_0 = 4.5844e-04
Loss = 1.2100e-01, PNorm = 75.6102, GNorm = 0.6089, lr_0 = 4.5812e-04
Loss = 1.2464e-01, PNorm = 75.6218, GNorm = 0.5550, lr_0 = 4.5781e-04
Loss = 1.2389e-01, PNorm = 75.6355, GNorm = 0.7065, lr_0 = 4.5750e-04
Loss = 1.2299e-01, PNorm = 75.6459, GNorm = 0.8766, lr_0 = 4.5718e-04
Loss = 1.4449e-01, PNorm = 75.6586, GNorm = 0.8497, lr_0 = 4.5687e-04
Loss = 1.2113e-01, PNorm = 75.6763, GNorm = 0.7065, lr_0 = 4.5656e-04
Loss = 1.5401e-01, PNorm = 75.6841, GNorm = 0.6744, lr_0 = 4.5624e-04
Loss = 1.3626e-01, PNorm = 75.6978, GNorm = 0.8200, lr_0 = 4.5593e-04
Loss = 1.3156e-01, PNorm = 75.7082, GNorm = 0.7914, lr_0 = 4.5562e-04
Loss = 1.3985e-01, PNorm = 75.7249, GNorm = 0.7870, lr_0 = 4.5531e-04
Loss = 1.1595e-01, PNorm = 75.7400, GNorm = 0.5370, lr_0 = 4.5499e-04
Loss = 1.1219e-01, PNorm = 75.7547, GNorm = 0.9215, lr_0 = 4.5468e-04
Loss = 1.2259e-01, PNorm = 75.7626, GNorm = 0.6215, lr_0 = 4.5437e-04
Loss = 1.3363e-01, PNorm = 75.7735, GNorm = 0.9252, lr_0 = 4.5406e-04
Loss = 1.2873e-01, PNorm = 75.7859, GNorm = 0.7228, lr_0 = 4.5375e-04
Loss = 1.1117e-01, PNorm = 75.7990, GNorm = 0.7091, lr_0 = 4.5344e-04
Loss = 1.3745e-01, PNorm = 75.8155, GNorm = 0.8515, lr_0 = 4.5313e-04
Loss = 1.4366e-01, PNorm = 75.8323, GNorm = 0.7442, lr_0 = 4.5282e-04
Loss = 1.2643e-01, PNorm = 75.8466, GNorm = 0.6994, lr_0 = 4.5251e-04
Loss = 1.1722e-01, PNorm = 75.8576, GNorm = 0.9046, lr_0 = 4.5220e-04
Loss = 1.2439e-01, PNorm = 75.8757, GNorm = 0.9917, lr_0 = 4.5189e-04
Loss = 1.2985e-01, PNorm = 75.8938, GNorm = 0.6798, lr_0 = 4.5158e-04
Loss = 1.1394e-01, PNorm = 75.9062, GNorm = 0.6542, lr_0 = 4.5127e-04
Loss = 1.3859e-01, PNorm = 75.9231, GNorm = 0.6458, lr_0 = 4.5096e-04
Loss = 1.2769e-01, PNorm = 75.9360, GNorm = 0.7500, lr_0 = 4.5065e-04
Loss = 1.2551e-01, PNorm = 75.9533, GNorm = 0.4765, lr_0 = 4.5034e-04
Loss = 1.3384e-01, PNorm = 75.9668, GNorm = 1.1819, lr_0 = 4.5003e-04
Loss = 1.1976e-01, PNorm = 75.9735, GNorm = 0.6676, lr_0 = 4.4972e-04
Loss = 1.2494e-01, PNorm = 75.9836, GNorm = 1.2287, lr_0 = 4.4942e-04
Loss = 1.3897e-01, PNorm = 75.9925, GNorm = 1.0015, lr_0 = 4.4911e-04
Loss = 1.1277e-01, PNorm = 76.0026, GNorm = 0.6457, lr_0 = 4.4880e-04
Loss = 1.2177e-01, PNorm = 76.0097, GNorm = 0.7561, lr_0 = 4.4849e-04
Loss = 1.2269e-01, PNorm = 76.0179, GNorm = 0.6136, lr_0 = 4.4819e-04
Loss = 1.1698e-01, PNorm = 76.0318, GNorm = 1.0124, lr_0 = 4.4788e-04
Loss = 1.2219e-01, PNorm = 76.0447, GNorm = 0.7954, lr_0 = 4.4757e-04
Loss = 1.4021e-01, PNorm = 76.0621, GNorm = 0.6975, lr_0 = 4.4727e-04
Loss = 1.1053e-01, PNorm = 76.0785, GNorm = 0.6032, lr_0 = 4.4696e-04
Loss = 1.3638e-01, PNorm = 76.0960, GNorm = 0.6309, lr_0 = 4.4665e-04
Loss = 1.2599e-01, PNorm = 76.1093, GNorm = 0.9078, lr_0 = 4.4635e-04
Loss = 1.2893e-01, PNorm = 76.1217, GNorm = 0.8222, lr_0 = 4.4604e-04
Loss = 1.2270e-01, PNorm = 76.1311, GNorm = 0.6969, lr_0 = 4.4574e-04
Loss = 1.1947e-01, PNorm = 76.1432, GNorm = 0.8810, lr_0 = 4.4543e-04
Loss = 1.3804e-01, PNorm = 76.1473, GNorm = 0.7485, lr_0 = 4.4513e-04
Loss = 1.1968e-01, PNorm = 76.1615, GNorm = 0.5924, lr_0 = 4.4482e-04
Loss = 1.3390e-01, PNorm = 76.1731, GNorm = 1.3071, lr_0 = 4.4452e-04
Loss = 1.4218e-01, PNorm = 76.1911, GNorm = 0.5087, lr_0 = 4.4421e-04
Loss = 1.2781e-01, PNorm = 76.2043, GNorm = 0.8079, lr_0 = 4.4391e-04
Loss = 1.3078e-01, PNorm = 76.2131, GNorm = 0.6904, lr_0 = 4.4360e-04
Loss = 1.2414e-01, PNorm = 76.2212, GNorm = 0.5291, lr_0 = 4.4330e-04
Loss = 1.2087e-01, PNorm = 76.2353, GNorm = 1.1069, lr_0 = 4.4299e-04
Loss = 1.3615e-01, PNorm = 76.2488, GNorm = 0.8163, lr_0 = 4.4269e-04
Loss = 1.3183e-01, PNorm = 76.2626, GNorm = 1.0579, lr_0 = 4.4239e-04
Loss = 1.3973e-01, PNorm = 76.2754, GNorm = 0.8324, lr_0 = 4.4209e-04
Loss = 1.4316e-01, PNorm = 76.2878, GNorm = 0.6701, lr_0 = 4.4178e-04
Loss = 1.3140e-01, PNorm = 76.2964, GNorm = 0.6227, lr_0 = 4.4148e-04
Loss = 1.3372e-01, PNorm = 76.3064, GNorm = 0.4945, lr_0 = 4.4118e-04
Loss = 1.1566e-01, PNorm = 76.3195, GNorm = 0.5617, lr_0 = 4.4088e-04
Loss = 1.3671e-01, PNorm = 76.3370, GNorm = 0.5317, lr_0 = 4.4057e-04
Loss = 1.3229e-01, PNorm = 76.3463, GNorm = 0.7308, lr_0 = 4.4027e-04
Loss = 1.2944e-01, PNorm = 76.3561, GNorm = 0.5523, lr_0 = 4.3997e-04
Loss = 1.5280e-01, PNorm = 76.3640, GNorm = 0.9625, lr_0 = 4.3967e-04
Loss = 1.2007e-01, PNorm = 76.3752, GNorm = 0.4946, lr_0 = 4.3937e-04
Validation mae = 0.234751
Epoch 12
Loss = 1.1336e-01, PNorm = 76.3897, GNorm = 0.5012, lr_0 = 4.3907e-04
Loss = 1.2564e-01, PNorm = 76.4030, GNorm = 0.6476, lr_0 = 4.3877e-04
Loss = 1.3101e-01, PNorm = 76.4206, GNorm = 0.7627, lr_0 = 4.3846e-04
Loss = 1.2379e-01, PNorm = 76.4369, GNorm = 0.5241, lr_0 = 4.3816e-04
Loss = 1.1572e-01, PNorm = 76.4542, GNorm = 0.8163, lr_0 = 4.3786e-04
Loss = 1.2695e-01, PNorm = 76.4732, GNorm = 0.6282, lr_0 = 4.3756e-04
Loss = 1.2901e-01, PNorm = 76.4849, GNorm = 0.7425, lr_0 = 4.3726e-04
Loss = 1.3050e-01, PNorm = 76.4933, GNorm = 0.6119, lr_0 = 4.3696e-04
Loss = 1.1952e-01, PNorm = 76.5078, GNorm = 0.6771, lr_0 = 4.3667e-04
Loss = 1.1141e-01, PNorm = 76.5212, GNorm = 1.0093, lr_0 = 4.3637e-04
Loss = 1.1534e-01, PNorm = 76.5345, GNorm = 0.6335, lr_0 = 4.3607e-04
Loss = 1.2255e-01, PNorm = 76.5505, GNorm = 0.6999, lr_0 = 4.3577e-04
Loss = 1.0668e-01, PNorm = 76.5628, GNorm = 0.6220, lr_0 = 4.3547e-04
Loss = 1.1821e-01, PNorm = 76.5747, GNorm = 0.9162, lr_0 = 4.3517e-04
Loss = 1.2294e-01, PNorm = 76.5828, GNorm = 0.8972, lr_0 = 4.3487e-04
Loss = 1.2886e-01, PNorm = 76.5925, GNorm = 1.0948, lr_0 = 4.3458e-04
Loss = 1.2108e-01, PNorm = 76.6061, GNorm = 0.6073, lr_0 = 4.3428e-04
Loss = 1.1203e-01, PNorm = 76.6217, GNorm = 1.1915, lr_0 = 4.3398e-04
Loss = 1.3441e-01, PNorm = 76.6345, GNorm = 0.5469, lr_0 = 4.3368e-04
Loss = 1.1675e-01, PNorm = 76.6505, GNorm = 0.9714, lr_0 = 4.3339e-04
Loss = 1.0796e-01, PNorm = 76.6646, GNorm = 0.6631, lr_0 = 4.3309e-04
Loss = 1.2458e-01, PNorm = 76.6770, GNorm = 0.6400, lr_0 = 4.3279e-04
Loss = 1.3884e-01, PNorm = 76.6918, GNorm = 0.7338, lr_0 = 4.3250e-04
Loss = 1.1916e-01, PNorm = 76.7032, GNorm = 0.6860, lr_0 = 4.3220e-04
Loss = 1.2454e-01, PNorm = 76.7123, GNorm = 0.9395, lr_0 = 4.3190e-04
Loss = 1.1586e-01, PNorm = 76.7228, GNorm = 1.0462, lr_0 = 4.3161e-04
Loss = 1.2045e-01, PNorm = 76.7336, GNorm = 0.7211, lr_0 = 4.3131e-04
Loss = 1.2215e-01, PNorm = 76.7484, GNorm = 0.7280, lr_0 = 4.3102e-04
Loss = 1.2002e-01, PNorm = 76.7594, GNorm = 0.8463, lr_0 = 4.3072e-04
Loss = 1.1605e-01, PNorm = 76.7736, GNorm = 1.1248, lr_0 = 4.3043e-04
Loss = 1.2106e-01, PNorm = 76.7863, GNorm = 0.6928, lr_0 = 4.3013e-04
Loss = 1.1840e-01, PNorm = 76.8054, GNorm = 0.5773, lr_0 = 4.2984e-04
Loss = 1.1229e-01, PNorm = 76.8151, GNorm = 0.9442, lr_0 = 4.2954e-04
Loss = 1.1036e-01, PNorm = 76.8202, GNorm = 0.7934, lr_0 = 4.2925e-04
Loss = 1.2467e-01, PNorm = 76.8300, GNorm = 0.5990, lr_0 = 4.2895e-04
Loss = 1.1816e-01, PNorm = 76.8449, GNorm = 0.6263, lr_0 = 4.2866e-04
Loss = 1.1887e-01, PNorm = 76.8562, GNorm = 0.7362, lr_0 = 4.2837e-04
Loss = 1.2556e-01, PNorm = 76.8714, GNorm = 0.5803, lr_0 = 4.2807e-04
Loss = 1.2126e-01, PNorm = 76.8892, GNorm = 0.5768, lr_0 = 4.2778e-04
Loss = 1.1115e-01, PNorm = 76.9005, GNorm = 0.4564, lr_0 = 4.2749e-04
Loss = 1.1948e-01, PNorm = 76.9153, GNorm = 0.5460, lr_0 = 4.2719e-04
Loss = 1.1695e-01, PNorm = 76.9274, GNorm = 0.6072, lr_0 = 4.2690e-04
Loss = 1.0298e-01, PNorm = 76.9362, GNorm = 0.7773, lr_0 = 4.2661e-04
Loss = 1.2144e-01, PNorm = 76.9464, GNorm = 0.5761, lr_0 = 4.2632e-04
Loss = 1.2510e-01, PNorm = 76.9579, GNorm = 0.6964, lr_0 = 4.2602e-04
Loss = 1.4301e-01, PNorm = 76.9707, GNorm = 0.6984, lr_0 = 4.2573e-04
Loss = 1.3887e-01, PNorm = 76.9778, GNorm = 1.1809, lr_0 = 4.2544e-04
Loss = 1.0507e-01, PNorm = 76.9866, GNorm = 0.5352, lr_0 = 4.2515e-04
Loss = 1.1698e-01, PNorm = 77.0035, GNorm = 0.6037, lr_0 = 4.2486e-04
Loss = 1.2110e-01, PNorm = 77.0100, GNorm = 0.7801, lr_0 = 4.2457e-04
Loss = 1.2701e-01, PNorm = 77.0210, GNorm = 0.4590, lr_0 = 4.2428e-04
Loss = 1.1182e-01, PNorm = 77.0305, GNorm = 0.5416, lr_0 = 4.2399e-04
Loss = 1.2022e-01, PNorm = 77.0357, GNorm = 0.5584, lr_0 = 4.2370e-04
Loss = 1.3185e-01, PNorm = 77.0486, GNorm = 0.6477, lr_0 = 4.2340e-04
Loss = 1.2653e-01, PNorm = 77.0591, GNorm = 0.9730, lr_0 = 4.2311e-04
Loss = 1.2122e-01, PNorm = 77.0676, GNorm = 0.6490, lr_0 = 4.2283e-04
Loss = 1.1535e-01, PNorm = 77.0824, GNorm = 0.5996, lr_0 = 4.2254e-04
Loss = 1.0984e-01, PNorm = 77.0966, GNorm = 0.6282, lr_0 = 4.2225e-04
Loss = 1.1600e-01, PNorm = 77.1095, GNorm = 0.9392, lr_0 = 4.2196e-04
Loss = 1.2813e-01, PNorm = 77.1180, GNorm = 1.0009, lr_0 = 4.2167e-04
Loss = 1.2614e-01, PNorm = 77.1301, GNorm = 0.7914, lr_0 = 4.2138e-04
Loss = 1.1181e-01, PNorm = 77.1429, GNorm = 0.8252, lr_0 = 4.2109e-04
Loss = 1.2540e-01, PNorm = 77.1538, GNorm = 0.5491, lr_0 = 4.2080e-04
Loss = 1.1339e-01, PNorm = 77.1621, GNorm = 0.7744, lr_0 = 4.2051e-04
Loss = 1.2195e-01, PNorm = 77.1725, GNorm = 0.5190, lr_0 = 4.2023e-04
Loss = 1.2541e-01, PNorm = 77.1886, GNorm = 1.0602, lr_0 = 4.1994e-04
Loss = 1.2131e-01, PNorm = 77.1977, GNorm = 0.6693, lr_0 = 4.1965e-04
Loss = 1.1443e-01, PNorm = 77.2062, GNorm = 0.5629, lr_0 = 4.1936e-04
Loss = 1.1048e-01, PNorm = 77.2141, GNorm = 0.5649, lr_0 = 4.1907e-04
Loss = 1.2126e-01, PNorm = 77.2256, GNorm = 0.7029, lr_0 = 4.1879e-04
Loss = 1.2676e-01, PNorm = 77.2367, GNorm = 0.8573, lr_0 = 4.1850e-04
Loss = 1.1766e-01, PNorm = 77.2438, GNorm = 0.8896, lr_0 = 4.1821e-04
Loss = 1.1801e-01, PNorm = 77.2620, GNorm = 0.7982, lr_0 = 4.1793e-04
Loss = 1.2900e-01, PNorm = 77.2752, GNorm = 0.6813, lr_0 = 4.1764e-04
Loss = 1.1350e-01, PNorm = 77.2862, GNorm = 1.1298, lr_0 = 4.1736e-04
Loss = 1.2095e-01, PNorm = 77.2978, GNorm = 1.1480, lr_0 = 4.1707e-04
Loss = 1.1794e-01, PNorm = 77.3098, GNorm = 0.6149, lr_0 = 4.1678e-04
Loss = 1.2248e-01, PNorm = 77.3207, GNorm = 0.6478, lr_0 = 4.1650e-04
Loss = 1.1434e-01, PNorm = 77.3310, GNorm = 0.6692, lr_0 = 4.1621e-04
Loss = 1.3189e-01, PNorm = 77.3397, GNorm = 0.9748, lr_0 = 4.1593e-04
Loss = 1.1979e-01, PNorm = 77.3511, GNorm = 0.6641, lr_0 = 4.1564e-04
Loss = 1.2308e-01, PNorm = 77.3652, GNorm = 0.7092, lr_0 = 4.1536e-04
Loss = 1.3914e-01, PNorm = 77.3770, GNorm = 0.6813, lr_0 = 4.1507e-04
Loss = 1.1471e-01, PNorm = 77.3872, GNorm = 0.7235, lr_0 = 4.1479e-04
Loss = 1.5015e-01, PNorm = 77.3949, GNorm = 0.6272, lr_0 = 4.1450e-04
Loss = 1.1956e-01, PNorm = 77.4063, GNorm = 0.8176, lr_0 = 4.1422e-04
Loss = 1.2769e-01, PNorm = 77.4178, GNorm = 0.7215, lr_0 = 4.1394e-04
Loss = 1.1296e-01, PNorm = 77.4315, GNorm = 0.4621, lr_0 = 4.1365e-04
Loss = 1.1606e-01, PNorm = 77.4411, GNorm = 0.5585, lr_0 = 4.1337e-04
Loss = 1.2462e-01, PNorm = 77.4559, GNorm = 0.7207, lr_0 = 4.1309e-04
Loss = 1.0699e-01, PNorm = 77.4591, GNorm = 0.6474, lr_0 = 4.1280e-04
Loss = 1.2822e-01, PNorm = 77.4621, GNorm = 0.6159, lr_0 = 4.1252e-04
Loss = 1.1928e-01, PNorm = 77.4711, GNorm = 0.6479, lr_0 = 4.1224e-04
Loss = 1.2184e-01, PNorm = 77.4818, GNorm = 0.8144, lr_0 = 4.1196e-04
Loss = 1.4883e-01, PNorm = 77.4910, GNorm = 0.7421, lr_0 = 4.1167e-04
Loss = 1.2068e-01, PNorm = 77.5009, GNorm = 0.6198, lr_0 = 4.1139e-04
Loss = 1.2532e-01, PNorm = 77.5122, GNorm = 0.9112, lr_0 = 4.1111e-04
Loss = 1.1956e-01, PNorm = 77.5231, GNorm = 0.6259, lr_0 = 4.1083e-04
Loss = 1.2108e-01, PNorm = 77.5324, GNorm = 0.8989, lr_0 = 4.1055e-04
Loss = 1.1103e-01, PNorm = 77.5440, GNorm = 0.5304, lr_0 = 4.1027e-04
Loss = 1.2111e-01, PNorm = 77.5538, GNorm = 0.6720, lr_0 = 4.0998e-04
Loss = 1.2404e-01, PNorm = 77.5644, GNorm = 0.9623, lr_0 = 4.0970e-04
Loss = 1.3730e-01, PNorm = 77.5710, GNorm = 0.7802, lr_0 = 4.0942e-04
Loss = 1.3265e-01, PNorm = 77.5790, GNorm = 0.6966, lr_0 = 4.0914e-04
Loss = 1.3768e-01, PNorm = 77.5899, GNorm = 0.6922, lr_0 = 4.0886e-04
Loss = 1.2004e-01, PNorm = 77.6051, GNorm = 0.6296, lr_0 = 4.0858e-04
Loss = 1.2327e-01, PNorm = 77.6181, GNorm = 0.6378, lr_0 = 4.0830e-04
Loss = 1.2853e-01, PNorm = 77.6289, GNorm = 1.0129, lr_0 = 4.0802e-04
Loss = 1.1417e-01, PNorm = 77.6396, GNorm = 0.8626, lr_0 = 4.0774e-04
Loss = 1.1901e-01, PNorm = 77.6516, GNorm = 0.8826, lr_0 = 4.0746e-04
Loss = 1.0697e-01, PNorm = 77.6596, GNorm = 0.8788, lr_0 = 4.0718e-04
Loss = 1.5306e-01, PNorm = 77.6695, GNorm = 0.7619, lr_0 = 4.0691e-04
Loss = 1.4644e-01, PNorm = 77.6848, GNorm = 0.7497, lr_0 = 4.0663e-04
Loss = 1.2439e-01, PNorm = 77.6980, GNorm = 0.8135, lr_0 = 4.0635e-04
Loss = 1.2742e-01, PNorm = 77.7027, GNorm = 0.8859, lr_0 = 4.0607e-04
Loss = 1.1965e-01, PNorm = 77.7144, GNorm = 0.6524, lr_0 = 4.0579e-04
Loss = 1.3971e-01, PNorm = 77.7249, GNorm = 0.7247, lr_0 = 4.0551e-04
Loss = 1.3451e-01, PNorm = 77.7320, GNorm = 0.7063, lr_0 = 4.0524e-04
Loss = 1.2195e-01, PNorm = 77.7393, GNorm = 0.5717, lr_0 = 4.0496e-04
Loss = 1.2396e-01, PNorm = 77.7491, GNorm = 0.6444, lr_0 = 4.0468e-04
Validation mae = 0.234076
Epoch 13
Loss = 1.1543e-01, PNorm = 77.7579, GNorm = 0.7479, lr_0 = 4.0440e-04
Loss = 1.2087e-01, PNorm = 77.7700, GNorm = 0.7345, lr_0 = 4.0413e-04
Loss = 1.0424e-01, PNorm = 77.7815, GNorm = 0.6550, lr_0 = 4.0385e-04
Loss = 1.2515e-01, PNorm = 77.7917, GNorm = 0.7558, lr_0 = 4.0357e-04
Loss = 1.0984e-01, PNorm = 77.8043, GNorm = 0.9523, lr_0 = 4.0330e-04
Loss = 9.7891e-02, PNorm = 77.8098, GNorm = 0.7250, lr_0 = 4.0302e-04
Loss = 1.0787e-01, PNorm = 77.8187, GNorm = 0.8652, lr_0 = 4.0274e-04
Loss = 1.2917e-01, PNorm = 77.8284, GNorm = 0.6080, lr_0 = 4.0247e-04
Loss = 9.7435e-02, PNorm = 77.8339, GNorm = 0.7126, lr_0 = 4.0219e-04
Loss = 1.1915e-01, PNorm = 77.8420, GNorm = 0.9654, lr_0 = 4.0192e-04
Loss = 1.1980e-01, PNorm = 77.8551, GNorm = 0.8395, lr_0 = 4.0164e-04
Loss = 1.1068e-01, PNorm = 77.8706, GNorm = 0.4120, lr_0 = 4.0137e-04
Loss = 1.0438e-01, PNorm = 77.8829, GNorm = 0.7030, lr_0 = 4.0109e-04
Loss = 1.1155e-01, PNorm = 77.8878, GNorm = 0.9962, lr_0 = 4.0082e-04
Loss = 1.2414e-01, PNorm = 77.8958, GNorm = 0.7177, lr_0 = 4.0054e-04
Loss = 1.0879e-01, PNorm = 77.9028, GNorm = 0.7341, lr_0 = 4.0027e-04
Loss = 1.2133e-01, PNorm = 77.9164, GNorm = 0.6224, lr_0 = 3.9999e-04
Loss = 1.1933e-01, PNorm = 77.9277, GNorm = 0.9908, lr_0 = 3.9972e-04
Loss = 1.1581e-01, PNorm = 77.9421, GNorm = 0.6351, lr_0 = 3.9945e-04
Loss = 1.0779e-01, PNorm = 77.9554, GNorm = 0.6758, lr_0 = 3.9917e-04
Loss = 1.2605e-01, PNorm = 77.9700, GNorm = 1.2215, lr_0 = 3.9890e-04
Loss = 1.1285e-01, PNorm = 77.9789, GNorm = 0.8409, lr_0 = 3.9863e-04
Loss = 1.2567e-01, PNorm = 77.9888, GNorm = 0.6797, lr_0 = 3.9835e-04
Loss = 1.1495e-01, PNorm = 77.9969, GNorm = 0.7000, lr_0 = 3.9808e-04
Loss = 1.0559e-01, PNorm = 78.0118, GNorm = 0.7056, lr_0 = 3.9781e-04
Loss = 1.1265e-01, PNorm = 78.0252, GNorm = 1.1429, lr_0 = 3.9753e-04
Loss = 1.1839e-01, PNorm = 78.0377, GNorm = 0.6720, lr_0 = 3.9726e-04
Loss = 1.2365e-01, PNorm = 78.0489, GNorm = 0.8025, lr_0 = 3.9699e-04
Loss = 1.1995e-01, PNorm = 78.0587, GNorm = 0.5718, lr_0 = 3.9672e-04
Loss = 1.1664e-01, PNorm = 78.0668, GNorm = 0.9439, lr_0 = 3.9645e-04
Loss = 1.1227e-01, PNorm = 78.0768, GNorm = 0.6277, lr_0 = 3.9617e-04
Loss = 1.1615e-01, PNorm = 78.0882, GNorm = 1.4627, lr_0 = 3.9590e-04
Loss = 1.1987e-01, PNorm = 78.0989, GNorm = 0.6762, lr_0 = 3.9563e-04
Loss = 9.5218e-02, PNorm = 78.1104, GNorm = 0.7135, lr_0 = 3.9536e-04
Loss = 1.1074e-01, PNorm = 78.1193, GNorm = 0.4811, lr_0 = 3.9509e-04
Loss = 1.2532e-01, PNorm = 78.1217, GNorm = 0.7233, lr_0 = 3.9482e-04
Loss = 1.1875e-01, PNorm = 78.1293, GNorm = 0.5326, lr_0 = 3.9455e-04
Loss = 1.1268e-01, PNorm = 78.1400, GNorm = 0.6769, lr_0 = 3.9428e-04
Loss = 1.1125e-01, PNorm = 78.1494, GNorm = 0.8038, lr_0 = 3.9401e-04
Loss = 1.0441e-01, PNorm = 78.1578, GNorm = 0.5474, lr_0 = 3.9374e-04
Loss = 1.0758e-01, PNorm = 78.1683, GNorm = 0.5934, lr_0 = 3.9347e-04
Loss = 1.2188e-01, PNorm = 78.1765, GNorm = 0.5815, lr_0 = 3.9320e-04
Loss = 1.2304e-01, PNorm = 78.1856, GNorm = 0.7369, lr_0 = 3.9293e-04
Loss = 1.0879e-01, PNorm = 78.1901, GNorm = 0.6636, lr_0 = 3.9266e-04
Loss = 9.6027e-02, PNorm = 78.1987, GNorm = 0.6604, lr_0 = 3.9239e-04
Loss = 1.0131e-01, PNorm = 78.2062, GNorm = 0.5547, lr_0 = 3.9212e-04
Loss = 1.2799e-01, PNorm = 78.2171, GNorm = 0.7983, lr_0 = 3.9185e-04
Loss = 1.1557e-01, PNorm = 78.2292, GNorm = 0.5713, lr_0 = 3.9159e-04
Loss = 1.0722e-01, PNorm = 78.2403, GNorm = 0.5799, lr_0 = 3.9132e-04
Loss = 1.1828e-01, PNorm = 78.2494, GNorm = 0.8477, lr_0 = 3.9105e-04
Loss = 1.4566e-01, PNorm = 78.2591, GNorm = 0.8326, lr_0 = 3.9078e-04
Loss = 1.1410e-01, PNorm = 78.2688, GNorm = 0.7512, lr_0 = 3.9051e-04
Loss = 1.1240e-01, PNorm = 78.2831, GNorm = 0.5547, lr_0 = 3.9025e-04
Loss = 1.2414e-01, PNorm = 78.2967, GNorm = 0.6124, lr_0 = 3.8998e-04
Loss = 1.0740e-01, PNorm = 78.3091, GNorm = 0.6019, lr_0 = 3.8971e-04
Loss = 1.3165e-01, PNorm = 78.3179, GNorm = 0.9329, lr_0 = 3.8945e-04
Loss = 1.0802e-01, PNorm = 78.3321, GNorm = 0.6649, lr_0 = 3.8918e-04
Loss = 1.1879e-01, PNorm = 78.3411, GNorm = 0.6419, lr_0 = 3.8891e-04
Loss = 1.1212e-01, PNorm = 78.3559, GNorm = 0.5779, lr_0 = 3.8865e-04
Loss = 1.1025e-01, PNorm = 78.3659, GNorm = 0.5094, lr_0 = 3.8838e-04
Loss = 1.2180e-01, PNorm = 78.3706, GNorm = 0.8670, lr_0 = 3.8811e-04
Loss = 1.2757e-01, PNorm = 78.3758, GNorm = 0.8866, lr_0 = 3.8785e-04
Loss = 1.1293e-01, PNorm = 78.3876, GNorm = 0.6252, lr_0 = 3.8758e-04
Loss = 1.1447e-01, PNorm = 78.3934, GNorm = 0.7170, lr_0 = 3.8732e-04
Loss = 1.2807e-01, PNorm = 78.3988, GNorm = 0.7196, lr_0 = 3.8705e-04
Loss = 1.1304e-01, PNorm = 78.4096, GNorm = 0.7397, lr_0 = 3.8679e-04
Loss = 1.2192e-01, PNorm = 78.4198, GNorm = 0.5156, lr_0 = 3.8652e-04
Loss = 1.2050e-01, PNorm = 78.4277, GNorm = 0.6938, lr_0 = 3.8626e-04
Loss = 1.1571e-01, PNorm = 78.4370, GNorm = 0.6489, lr_0 = 3.8599e-04
Loss = 1.1575e-01, PNorm = 78.4447, GNorm = 0.7440, lr_0 = 3.8573e-04
Loss = 1.1406e-01, PNorm = 78.4559, GNorm = 0.9029, lr_0 = 3.8546e-04
Loss = 1.1979e-01, PNorm = 78.4625, GNorm = 0.6844, lr_0 = 3.8520e-04
Loss = 1.0175e-01, PNorm = 78.4683, GNorm = 0.7855, lr_0 = 3.8493e-04
Loss = 1.2355e-01, PNorm = 78.4803, GNorm = 1.0447, lr_0 = 3.8467e-04
Loss = 1.2403e-01, PNorm = 78.4920, GNorm = 1.1237, lr_0 = 3.8441e-04
Loss = 1.1141e-01, PNorm = 78.5000, GNorm = 0.8380, lr_0 = 3.8414e-04
Loss = 1.2584e-01, PNorm = 78.5069, GNorm = 0.9809, lr_0 = 3.8388e-04
Loss = 1.2031e-01, PNorm = 78.5157, GNorm = 0.6026, lr_0 = 3.8362e-04
Loss = 1.0571e-01, PNorm = 78.5255, GNorm = 0.8929, lr_0 = 3.8336e-04
Loss = 1.1601e-01, PNorm = 78.5390, GNorm = 0.6529, lr_0 = 3.8309e-04
Loss = 1.0178e-01, PNorm = 78.5510, GNorm = 0.5617, lr_0 = 3.8283e-04
Loss = 1.2741e-01, PNorm = 78.5591, GNorm = 0.6872, lr_0 = 3.8257e-04
Loss = 1.1757e-01, PNorm = 78.5697, GNorm = 0.7061, lr_0 = 3.8231e-04
Loss = 1.1877e-01, PNorm = 78.5833, GNorm = 0.9846, lr_0 = 3.8204e-04
Loss = 1.2538e-01, PNorm = 78.5935, GNorm = 0.5716, lr_0 = 3.8178e-04
Loss = 1.0878e-01, PNorm = 78.6062, GNorm = 0.5867, lr_0 = 3.8152e-04
Loss = 1.2308e-01, PNorm = 78.6158, GNorm = 1.1234, lr_0 = 3.8126e-04
Loss = 1.0149e-01, PNorm = 78.6223, GNorm = 0.6244, lr_0 = 3.8100e-04
Loss = 1.1333e-01, PNorm = 78.6273, GNorm = 0.6388, lr_0 = 3.8074e-04
Loss = 1.1435e-01, PNorm = 78.6352, GNorm = 0.6035, lr_0 = 3.8048e-04
Loss = 1.4065e-01, PNorm = 78.6440, GNorm = 0.7758, lr_0 = 3.8022e-04
Loss = 1.1141e-01, PNorm = 78.6542, GNorm = 0.5725, lr_0 = 3.7995e-04
Loss = 1.1544e-01, PNorm = 78.6605, GNorm = 0.5601, lr_0 = 3.7969e-04
Loss = 1.0940e-01, PNorm = 78.6698, GNorm = 0.7019, lr_0 = 3.7943e-04
Loss = 1.2489e-01, PNorm = 78.6815, GNorm = 0.7069, lr_0 = 3.7917e-04
Loss = 1.0961e-01, PNorm = 78.6915, GNorm = 0.8932, lr_0 = 3.7891e-04
Loss = 1.2008e-01, PNorm = 78.7057, GNorm = 0.6555, lr_0 = 3.7866e-04
Loss = 1.4431e-01, PNorm = 78.7201, GNorm = 1.7519, lr_0 = 3.7840e-04
Loss = 9.9607e-02, PNorm = 78.7313, GNorm = 0.6021, lr_0 = 3.7814e-04
Loss = 1.2217e-01, PNorm = 78.7465, GNorm = 0.8390, lr_0 = 3.7788e-04
Loss = 1.2084e-01, PNorm = 78.7579, GNorm = 0.8682, lr_0 = 3.7762e-04
Loss = 1.0775e-01, PNorm = 78.7665, GNorm = 0.6881, lr_0 = 3.7736e-04
Loss = 1.2586e-01, PNorm = 78.7751, GNorm = 0.5945, lr_0 = 3.7710e-04
Loss = 1.1953e-01, PNorm = 78.7829, GNorm = 0.8103, lr_0 = 3.7684e-04
Loss = 1.3104e-01, PNorm = 78.7969, GNorm = 0.9135, lr_0 = 3.7659e-04
Loss = 1.1343e-01, PNorm = 78.8088, GNorm = 0.6285, lr_0 = 3.7633e-04
Loss = 1.1022e-01, PNorm = 78.8185, GNorm = 0.6313, lr_0 = 3.7607e-04
Loss = 1.2705e-01, PNorm = 78.8218, GNorm = 1.1459, lr_0 = 3.7581e-04
Loss = 1.1273e-01, PNorm = 78.8290, GNorm = 0.6137, lr_0 = 3.7555e-04
Loss = 1.1105e-01, PNorm = 78.8394, GNorm = 0.6760, lr_0 = 3.7530e-04
Loss = 1.2431e-01, PNorm = 78.8454, GNorm = 0.6791, lr_0 = 3.7504e-04
Loss = 1.2803e-01, PNorm = 78.8581, GNorm = 0.9712, lr_0 = 3.7478e-04
Loss = 1.5022e-01, PNorm = 78.8649, GNorm = 0.5572, lr_0 = 3.7453e-04
Loss = 1.0853e-01, PNorm = 78.8766, GNorm = 0.9457, lr_0 = 3.7427e-04
Loss = 1.1053e-01, PNorm = 78.8858, GNorm = 0.6241, lr_0 = 3.7401e-04
Loss = 9.7830e-02, PNorm = 78.8988, GNorm = 0.8099, lr_0 = 3.7376e-04
Loss = 1.1048e-01, PNorm = 78.9064, GNorm = 0.5240, lr_0 = 3.7350e-04
Loss = 1.3281e-01, PNorm = 78.9148, GNorm = 0.7694, lr_0 = 3.7325e-04
Loss = 1.0598e-01, PNorm = 78.9178, GNorm = 0.7292, lr_0 = 3.7299e-04
Loss = 1.2810e-01, PNorm = 78.9253, GNorm = 0.8733, lr_0 = 3.7273e-04
Validation mae = 0.230074
Epoch 14
Loss = 1.1565e-01, PNorm = 78.9379, GNorm = 1.5141, lr_0 = 3.7248e-04
Loss = 1.2285e-01, PNorm = 78.9509, GNorm = 0.6309, lr_0 = 3.7222e-04
Loss = 1.0748e-01, PNorm = 78.9654, GNorm = 0.6507, lr_0 = 3.7197e-04
Loss = 9.9753e-02, PNorm = 78.9796, GNorm = 0.5294, lr_0 = 3.7171e-04
Loss = 1.1482e-01, PNorm = 78.9942, GNorm = 0.7716, lr_0 = 3.7146e-04
Loss = 1.2685e-01, PNorm = 79.0068, GNorm = 0.5803, lr_0 = 3.7120e-04
Loss = 1.0757e-01, PNorm = 79.0204, GNorm = 0.6187, lr_0 = 3.7095e-04
Loss = 9.7394e-02, PNorm = 79.0344, GNorm = 0.6438, lr_0 = 3.7070e-04
Loss = 1.0124e-01, PNorm = 79.0446, GNorm = 1.0884, lr_0 = 3.7044e-04
Loss = 1.1396e-01, PNorm = 79.0535, GNorm = 1.0742, lr_0 = 3.7019e-04
Loss = 1.0347e-01, PNorm = 79.0669, GNorm = 0.8623, lr_0 = 3.6993e-04
Loss = 1.0470e-01, PNorm = 79.0786, GNorm = 0.6152, lr_0 = 3.6968e-04
Loss = 1.0545e-01, PNorm = 79.0822, GNorm = 0.6404, lr_0 = 3.6943e-04
Loss = 1.1577e-01, PNorm = 79.0873, GNorm = 0.6366, lr_0 = 3.6917e-04
Loss = 1.0703e-01, PNorm = 79.0946, GNorm = 0.6223, lr_0 = 3.6892e-04
Loss = 1.0742e-01, PNorm = 79.1016, GNorm = 1.2030, lr_0 = 3.6867e-04
Loss = 1.1402e-01, PNorm = 79.1104, GNorm = 0.7767, lr_0 = 3.6842e-04
Loss = 1.2767e-01, PNorm = 79.1205, GNorm = 1.0520, lr_0 = 3.6816e-04
Loss = 1.0707e-01, PNorm = 79.1322, GNorm = 0.5363, lr_0 = 3.6791e-04
Loss = 1.0329e-01, PNorm = 79.1438, GNorm = 0.5156, lr_0 = 3.6766e-04
Loss = 9.9027e-02, PNorm = 79.1503, GNorm = 0.8430, lr_0 = 3.6741e-04
Loss = 1.2273e-01, PNorm = 79.1570, GNorm = 0.8735, lr_0 = 3.6716e-04
Loss = 1.1874e-01, PNorm = 79.1659, GNorm = 0.8678, lr_0 = 3.6690e-04
Loss = 1.1124e-01, PNorm = 79.1739, GNorm = 0.9293, lr_0 = 3.6665e-04
Loss = 1.1255e-01, PNorm = 79.1845, GNorm = 0.6113, lr_0 = 3.6640e-04
Loss = 1.0465e-01, PNorm = 79.1954, GNorm = 0.7364, lr_0 = 3.6615e-04
Loss = 1.0524e-01, PNorm = 79.2003, GNorm = 0.4958, lr_0 = 3.6590e-04
Loss = 1.0901e-01, PNorm = 79.2120, GNorm = 1.0894, lr_0 = 3.6565e-04
Loss = 1.0984e-01, PNorm = 79.2193, GNorm = 0.5811, lr_0 = 3.6540e-04
Loss = 1.0802e-01, PNorm = 79.2343, GNorm = 0.6776, lr_0 = 3.6515e-04
Loss = 1.1861e-01, PNorm = 79.2431, GNorm = 0.6675, lr_0 = 3.6490e-04
Loss = 1.2337e-01, PNorm = 79.2536, GNorm = 0.6404, lr_0 = 3.6465e-04
Loss = 1.0553e-01, PNorm = 79.2636, GNorm = 0.6709, lr_0 = 3.6440e-04
Loss = 1.2375e-01, PNorm = 79.2715, GNorm = 0.7076, lr_0 = 3.6415e-04
Loss = 1.1044e-01, PNorm = 79.2835, GNorm = 0.6871, lr_0 = 3.6390e-04
Loss = 1.0832e-01, PNorm = 79.2913, GNorm = 0.6295, lr_0 = 3.6365e-04
Loss = 1.1271e-01, PNorm = 79.2984, GNorm = 0.5648, lr_0 = 3.6340e-04
Loss = 1.0752e-01, PNorm = 79.3093, GNorm = 0.7095, lr_0 = 3.6315e-04
Loss = 9.9910e-02, PNorm = 79.3161, GNorm = 0.4514, lr_0 = 3.6290e-04
Loss = 1.1804e-01, PNorm = 79.3247, GNorm = 0.6293, lr_0 = 3.6266e-04
Loss = 1.1807e-01, PNorm = 79.3323, GNorm = 0.7579, lr_0 = 3.6241e-04
Loss = 1.0621e-01, PNorm = 79.3388, GNorm = 0.6628, lr_0 = 3.6216e-04
Loss = 1.1226e-01, PNorm = 79.3479, GNorm = 0.5756, lr_0 = 3.6191e-04
Loss = 1.1902e-01, PNorm = 79.3590, GNorm = 0.7519, lr_0 = 3.6166e-04
Loss = 1.1361e-01, PNorm = 79.3705, GNorm = 0.5726, lr_0 = 3.6141e-04
Loss = 1.2249e-01, PNorm = 79.3795, GNorm = 0.5203, lr_0 = 3.6117e-04
Loss = 1.1296e-01, PNorm = 79.3884, GNorm = 0.6026, lr_0 = 3.6092e-04
Loss = 1.2931e-01, PNorm = 79.3962, GNorm = 0.6771, lr_0 = 3.6067e-04
Loss = 1.0204e-01, PNorm = 79.4057, GNorm = 0.7126, lr_0 = 3.6043e-04
Loss = 1.0823e-01, PNorm = 79.4136, GNorm = 0.6134, lr_0 = 3.6018e-04
Loss = 1.0869e-01, PNorm = 79.4228, GNorm = 0.7289, lr_0 = 3.5993e-04
Loss = 1.2747e-01, PNorm = 79.4275, GNorm = 0.5560, lr_0 = 3.5969e-04
Loss = 9.8150e-02, PNorm = 79.4377, GNorm = 0.6115, lr_0 = 3.5944e-04
Loss = 1.1026e-01, PNorm = 79.4477, GNorm = 0.9091, lr_0 = 3.5919e-04
Loss = 1.1843e-01, PNorm = 79.4557, GNorm = 0.7497, lr_0 = 3.5895e-04
Loss = 1.1469e-01, PNorm = 79.4661, GNorm = 0.4392, lr_0 = 3.5870e-04
Loss = 1.0404e-01, PNorm = 79.4737, GNorm = 0.8064, lr_0 = 3.5845e-04
Loss = 1.0146e-01, PNorm = 79.4771, GNorm = 0.4990, lr_0 = 3.5821e-04
Loss = 1.2122e-01, PNorm = 79.4851, GNorm = 0.7664, lr_0 = 3.5796e-04
Loss = 1.1233e-01, PNorm = 79.4961, GNorm = 0.6347, lr_0 = 3.5772e-04
Loss = 1.2650e-01, PNorm = 79.5041, GNorm = 0.7537, lr_0 = 3.5747e-04
Loss = 1.0631e-01, PNorm = 79.5092, GNorm = 0.4851, lr_0 = 3.5723e-04
Loss = 1.2620e-01, PNorm = 79.5246, GNorm = 1.0069, lr_0 = 3.5698e-04
Loss = 1.1058e-01, PNorm = 79.5350, GNorm = 1.0529, lr_0 = 3.5674e-04
Loss = 1.0780e-01, PNorm = 79.5442, GNorm = 0.7039, lr_0 = 3.5650e-04
Loss = 1.0411e-01, PNorm = 79.5526, GNorm = 0.9026, lr_0 = 3.5625e-04
Loss = 1.1821e-01, PNorm = 79.5594, GNorm = 0.5939, lr_0 = 3.5601e-04
Loss = 1.1025e-01, PNorm = 79.5671, GNorm = 0.6851, lr_0 = 3.5576e-04
Loss = 1.0692e-01, PNorm = 79.5782, GNorm = 0.5035, lr_0 = 3.5552e-04
Loss = 1.0799e-01, PNorm = 79.5927, GNorm = 0.6492, lr_0 = 3.5528e-04
Loss = 1.1146e-01, PNorm = 79.5995, GNorm = 0.7474, lr_0 = 3.5503e-04
Loss = 1.1320e-01, PNorm = 79.6055, GNorm = 0.4851, lr_0 = 3.5479e-04
Loss = 1.1292e-01, PNorm = 79.6143, GNorm = 0.5196, lr_0 = 3.5455e-04
Loss = 1.1168e-01, PNorm = 79.6272, GNorm = 0.5862, lr_0 = 3.5430e-04
Loss = 1.2157e-01, PNorm = 79.6403, GNorm = 0.4943, lr_0 = 3.5406e-04
Loss = 1.1005e-01, PNorm = 79.6497, GNorm = 0.7641, lr_0 = 3.5382e-04
Loss = 1.0357e-01, PNorm = 79.6596, GNorm = 0.6446, lr_0 = 3.5358e-04
Loss = 1.0977e-01, PNorm = 79.6671, GNorm = 0.4864, lr_0 = 3.5333e-04
Loss = 1.1662e-01, PNorm = 79.6781, GNorm = 0.7736, lr_0 = 3.5309e-04
Loss = 1.2051e-01, PNorm = 79.6849, GNorm = 0.8270, lr_0 = 3.5285e-04
Loss = 1.3416e-01, PNorm = 79.6947, GNorm = 0.7736, lr_0 = 3.5261e-04
Loss = 9.5976e-02, PNorm = 79.7001, GNorm = 0.5472, lr_0 = 3.5237e-04
Loss = 1.1093e-01, PNorm = 79.7085, GNorm = 0.6652, lr_0 = 3.5212e-04
Loss = 9.6163e-02, PNorm = 79.7173, GNorm = 0.6685, lr_0 = 3.5188e-04
Loss = 1.1453e-01, PNorm = 79.7231, GNorm = 0.5483, lr_0 = 3.5164e-04
Loss = 1.2055e-01, PNorm = 79.7290, GNorm = 0.5849, lr_0 = 3.5140e-04
Loss = 1.2676e-01, PNorm = 79.7385, GNorm = 1.0961, lr_0 = 3.5116e-04
Loss = 1.2455e-01, PNorm = 79.7497, GNorm = 0.6821, lr_0 = 3.5092e-04
Loss = 1.1510e-01, PNorm = 79.7626, GNorm = 0.6334, lr_0 = 3.5068e-04
Loss = 1.1478e-01, PNorm = 79.7755, GNorm = 1.2133, lr_0 = 3.5044e-04
Loss = 1.2250e-01, PNorm = 79.7861, GNorm = 0.8554, lr_0 = 3.5020e-04
Loss = 1.1469e-01, PNorm = 79.7961, GNorm = 0.9007, lr_0 = 3.4996e-04
Loss = 1.1523e-01, PNorm = 79.8018, GNorm = 0.6130, lr_0 = 3.4972e-04
Loss = 1.1856e-01, PNorm = 79.8068, GNorm = 0.7108, lr_0 = 3.4948e-04
Loss = 1.1725e-01, PNorm = 79.8162, GNorm = 0.5713, lr_0 = 3.4924e-04
Loss = 1.1556e-01, PNorm = 79.8274, GNorm = 0.5178, lr_0 = 3.4900e-04
Loss = 1.0002e-01, PNorm = 79.8334, GNorm = 0.4865, lr_0 = 3.4876e-04
Loss = 1.0902e-01, PNorm = 79.8381, GNorm = 0.6788, lr_0 = 3.4852e-04
Loss = 9.9576e-02, PNorm = 79.8436, GNorm = 0.7004, lr_0 = 3.4828e-04
Loss = 1.1894e-01, PNorm = 79.8504, GNorm = 0.8476, lr_0 = 3.4805e-04
Loss = 1.0859e-01, PNorm = 79.8538, GNorm = 0.5735, lr_0 = 3.4781e-04
Loss = 1.1731e-01, PNorm = 79.8602, GNorm = 0.5390, lr_0 = 3.4757e-04
Loss = 9.7956e-02, PNorm = 79.8657, GNorm = 0.4929, lr_0 = 3.4733e-04
Loss = 1.2103e-01, PNorm = 79.8717, GNorm = 0.8586, lr_0 = 3.4709e-04
Loss = 1.1529e-01, PNorm = 79.8764, GNorm = 0.8228, lr_0 = 3.4686e-04
Loss = 1.2087e-01, PNorm = 79.8861, GNorm = 1.3355, lr_0 = 3.4662e-04
Loss = 1.1632e-01, PNorm = 79.8935, GNorm = 0.6865, lr_0 = 3.4638e-04
Loss = 1.2069e-01, PNorm = 79.9026, GNorm = 0.7642, lr_0 = 3.4614e-04
Loss = 9.6087e-02, PNorm = 79.9092, GNorm = 0.5093, lr_0 = 3.4591e-04
Loss = 1.1353e-01, PNorm = 79.9209, GNorm = 0.5320, lr_0 = 3.4567e-04
Loss = 1.1828e-01, PNorm = 79.9303, GNorm = 0.6201, lr_0 = 3.4543e-04
Loss = 1.1448e-01, PNorm = 79.9358, GNorm = 0.7351, lr_0 = 3.4520e-04
Loss = 1.1292e-01, PNorm = 79.9455, GNorm = 0.8655, lr_0 = 3.4496e-04
Loss = 1.0265e-01, PNorm = 79.9500, GNorm = 0.6736, lr_0 = 3.4472e-04
Loss = 1.1927e-01, PNorm = 79.9610, GNorm = 0.7507, lr_0 = 3.4449e-04
Loss = 1.0244e-01, PNorm = 79.9673, GNorm = 0.8110, lr_0 = 3.4425e-04
Loss = 1.0582e-01, PNorm = 79.9731, GNorm = 0.5139, lr_0 = 3.4402e-04
Loss = 1.0279e-01, PNorm = 79.9814, GNorm = 0.7226, lr_0 = 3.4378e-04
Loss = 9.6244e-02, PNorm = 79.9883, GNorm = 0.6171, lr_0 = 3.4354e-04
Loss = 1.0600e-01, PNorm = 79.9979, GNorm = 0.7433, lr_0 = 3.4331e-04
Validation mae = 0.231346
Epoch 15
Loss = 1.2086e-01, PNorm = 80.0086, GNorm = 0.8576, lr_0 = 3.4307e-04
Loss = 9.6341e-02, PNorm = 80.0187, GNorm = 1.1060, lr_0 = 3.4284e-04
Loss = 9.4086e-02, PNorm = 80.0266, GNorm = 0.5710, lr_0 = 3.4260e-04
Loss = 1.1163e-01, PNorm = 80.0354, GNorm = 0.9079, lr_0 = 3.4237e-04
Loss = 1.0945e-01, PNorm = 80.0440, GNorm = 0.8864, lr_0 = 3.4213e-04
Loss = 1.0472e-01, PNorm = 80.0523, GNorm = 0.4591, lr_0 = 3.4190e-04
Loss = 9.9442e-02, PNorm = 80.0582, GNorm = 0.7259, lr_0 = 3.4167e-04
Loss = 1.3195e-01, PNorm = 80.0641, GNorm = 0.5763, lr_0 = 3.4143e-04
Loss = 1.0367e-01, PNorm = 80.0718, GNorm = 0.7502, lr_0 = 3.4120e-04
Loss = 9.5634e-02, PNorm = 80.0789, GNorm = 0.5753, lr_0 = 3.4096e-04
Loss = 1.2017e-01, PNorm = 80.0873, GNorm = 0.7129, lr_0 = 3.4073e-04
Loss = 9.8486e-02, PNorm = 80.0923, GNorm = 0.7873, lr_0 = 3.4050e-04
Loss = 1.0504e-01, PNorm = 80.1028, GNorm = 0.7396, lr_0 = 3.4026e-04
Loss = 9.9200e-02, PNorm = 80.1113, GNorm = 0.8490, lr_0 = 3.4003e-04
Loss = 1.1423e-01, PNorm = 80.1181, GNorm = 0.6190, lr_0 = 3.3980e-04
Loss = 1.1685e-01, PNorm = 80.1290, GNorm = 1.0558, lr_0 = 3.3956e-04
Loss = 1.0824e-01, PNorm = 80.1403, GNorm = 0.8038, lr_0 = 3.3933e-04
Loss = 1.0489e-01, PNorm = 80.1448, GNorm = 0.5931, lr_0 = 3.3910e-04
Loss = 1.0323e-01, PNorm = 80.1519, GNorm = 0.4975, lr_0 = 3.3887e-04
Loss = 1.1029e-01, PNorm = 80.1571, GNorm = 0.5366, lr_0 = 3.3864e-04
Loss = 1.2346e-01, PNorm = 80.1681, GNorm = 0.7060, lr_0 = 3.3840e-04
Loss = 9.3008e-02, PNorm = 80.1749, GNorm = 0.7530, lr_0 = 3.3817e-04
Loss = 1.1131e-01, PNorm = 80.1805, GNorm = 0.7217, lr_0 = 3.3794e-04
Loss = 1.0120e-01, PNorm = 80.1859, GNorm = 0.7179, lr_0 = 3.3771e-04
Loss = 1.0497e-01, PNorm = 80.1951, GNorm = 0.5768, lr_0 = 3.3748e-04
Loss = 1.0955e-01, PNorm = 80.2037, GNorm = 0.7187, lr_0 = 3.3725e-04
Loss = 1.0127e-01, PNorm = 80.2119, GNorm = 0.6200, lr_0 = 3.3701e-04
Loss = 1.1060e-01, PNorm = 80.2222, GNorm = 0.5948, lr_0 = 3.3678e-04
Loss = 1.0648e-01, PNorm = 80.2321, GNorm = 0.6229, lr_0 = 3.3655e-04
Loss = 1.2601e-01, PNorm = 80.2408, GNorm = 0.9144, lr_0 = 3.3632e-04
Loss = 9.9042e-02, PNorm = 80.2543, GNorm = 0.7142, lr_0 = 3.3609e-04
Loss = 1.0012e-01, PNorm = 80.2656, GNorm = 0.7264, lr_0 = 3.3586e-04
Loss = 1.1381e-01, PNorm = 80.2737, GNorm = 0.5770, lr_0 = 3.3563e-04
Loss = 9.1329e-02, PNorm = 80.2801, GNorm = 0.5171, lr_0 = 3.3540e-04
Loss = 1.1541e-01, PNorm = 80.2865, GNorm = 0.4710, lr_0 = 3.3517e-04
Loss = 1.0427e-01, PNorm = 80.2909, GNorm = 0.9283, lr_0 = 3.3494e-04
Loss = 1.1787e-01, PNorm = 80.2923, GNorm = 0.8014, lr_0 = 3.3471e-04
Loss = 9.7049e-02, PNorm = 80.3014, GNorm = 0.5575, lr_0 = 3.3448e-04
Loss = 1.0184e-01, PNorm = 80.3100, GNorm = 0.7127, lr_0 = 3.3425e-04
Loss = 9.8694e-02, PNorm = 80.3221, GNorm = 0.5864, lr_0 = 3.3403e-04
Loss = 1.0736e-01, PNorm = 80.3343, GNorm = 0.7044, lr_0 = 3.3380e-04
Loss = 1.1843e-01, PNorm = 80.3424, GNorm = 0.7773, lr_0 = 3.3357e-04
Loss = 1.0734e-01, PNorm = 80.3469, GNorm = 1.0586, lr_0 = 3.3334e-04
Loss = 1.0756e-01, PNorm = 80.3562, GNorm = 0.5324, lr_0 = 3.3311e-04
Loss = 1.1062e-01, PNorm = 80.3655, GNorm = 0.5085, lr_0 = 3.3288e-04
Loss = 1.0112e-01, PNorm = 80.3714, GNorm = 0.6156, lr_0 = 3.3265e-04
Loss = 9.4051e-02, PNorm = 80.3826, GNorm = 0.8211, lr_0 = 3.3243e-04
Loss = 9.9282e-02, PNorm = 80.3907, GNorm = 0.5651, lr_0 = 3.3220e-04
Loss = 1.0421e-01, PNorm = 80.4044, GNorm = 0.4805, lr_0 = 3.3197e-04
Loss = 1.2118e-01, PNorm = 80.4165, GNorm = 0.7016, lr_0 = 3.3174e-04
Loss = 1.1762e-01, PNorm = 80.4243, GNorm = 0.7820, lr_0 = 3.3152e-04
Loss = 1.0207e-01, PNorm = 80.4320, GNorm = 0.5234, lr_0 = 3.3129e-04
Loss = 9.1256e-02, PNorm = 80.4400, GNorm = 0.9198, lr_0 = 3.3106e-04
Loss = 9.9848e-02, PNorm = 80.4446, GNorm = 0.6564, lr_0 = 3.3084e-04
Loss = 1.0479e-01, PNorm = 80.4554, GNorm = 0.5097, lr_0 = 3.3061e-04
Loss = 1.0500e-01, PNorm = 80.4634, GNorm = 0.6088, lr_0 = 3.3038e-04
Loss = 1.0318e-01, PNorm = 80.4716, GNorm = 0.8469, lr_0 = 3.3016e-04
Loss = 1.0885e-01, PNorm = 80.4786, GNorm = 0.7662, lr_0 = 3.2993e-04
Loss = 1.1934e-01, PNorm = 80.4870, GNorm = 1.0311, lr_0 = 3.2970e-04
Loss = 1.2287e-01, PNorm = 80.4910, GNorm = 0.6787, lr_0 = 3.2948e-04
Loss = 1.2032e-01, PNorm = 80.5020, GNorm = 0.5892, lr_0 = 3.2925e-04
Loss = 1.0426e-01, PNorm = 80.5087, GNorm = 0.5561, lr_0 = 3.2903e-04
Loss = 1.1032e-01, PNorm = 80.5191, GNorm = 0.7089, lr_0 = 3.2880e-04
Loss = 1.1498e-01, PNorm = 80.5232, GNorm = 0.6157, lr_0 = 3.2858e-04
Loss = 1.1874e-01, PNorm = 80.5320, GNorm = 0.6284, lr_0 = 3.2835e-04
Loss = 1.0756e-01, PNorm = 80.5399, GNorm = 0.7541, lr_0 = 3.2813e-04
Loss = 1.2211e-01, PNorm = 80.5483, GNorm = 0.8245, lr_0 = 3.2790e-04
Loss = 1.1252e-01, PNorm = 80.5566, GNorm = 0.5757, lr_0 = 3.2768e-04
Loss = 1.0618e-01, PNorm = 80.5649, GNorm = 0.6718, lr_0 = 3.2745e-04
Loss = 1.0667e-01, PNorm = 80.5722, GNorm = 0.6781, lr_0 = 3.2723e-04
Loss = 1.0331e-01, PNorm = 80.5795, GNorm = 0.7126, lr_0 = 3.2700e-04
Loss = 1.0537e-01, PNorm = 80.5843, GNorm = 0.5761, lr_0 = 3.2678e-04
Loss = 9.9219e-02, PNorm = 80.5918, GNorm = 0.6107, lr_0 = 3.2656e-04
Loss = 1.1839e-01, PNorm = 80.5966, GNorm = 0.6471, lr_0 = 3.2633e-04
Loss = 1.0977e-01, PNorm = 80.6057, GNorm = 0.6389, lr_0 = 3.2611e-04
Loss = 1.0111e-01, PNorm = 80.6132, GNorm = 0.5052, lr_0 = 3.2589e-04
Loss = 1.0253e-01, PNorm = 80.6196, GNorm = 0.9035, lr_0 = 3.2566e-04
Loss = 9.8858e-02, PNorm = 80.6273, GNorm = 0.6199, lr_0 = 3.2544e-04
Loss = 1.0559e-01, PNorm = 80.6328, GNorm = 0.8102, lr_0 = 3.2522e-04
Loss = 9.7864e-02, PNorm = 80.6402, GNorm = 0.4800, lr_0 = 3.2499e-04
Loss = 1.0051e-01, PNorm = 80.6496, GNorm = 0.8395, lr_0 = 3.2477e-04
Loss = 1.0974e-01, PNorm = 80.6559, GNorm = 0.5181, lr_0 = 3.2455e-04
Loss = 1.0235e-01, PNorm = 80.6641, GNorm = 0.5488, lr_0 = 3.2433e-04
Loss = 1.0731e-01, PNorm = 80.6713, GNorm = 0.4897, lr_0 = 3.2410e-04
Loss = 1.1167e-01, PNorm = 80.6765, GNorm = 0.6245, lr_0 = 3.2388e-04
Loss = 1.1698e-01, PNorm = 80.6831, GNorm = 0.7536, lr_0 = 3.2366e-04
Loss = 9.6119e-02, PNorm = 80.6883, GNorm = 0.7355, lr_0 = 3.2344e-04
Loss = 1.1409e-01, PNorm = 80.6933, GNorm = 0.8309, lr_0 = 3.2322e-04
Loss = 1.1185e-01, PNorm = 80.6965, GNorm = 0.7043, lr_0 = 3.2300e-04
Loss = 1.2294e-01, PNorm = 80.7068, GNorm = 0.6222, lr_0 = 3.2277e-04
Loss = 1.1153e-01, PNorm = 80.7164, GNorm = 0.6368, lr_0 = 3.2255e-04
Loss = 1.0251e-01, PNorm = 80.7231, GNorm = 0.5507, lr_0 = 3.2233e-04
Loss = 9.8075e-02, PNorm = 80.7301, GNorm = 0.6063, lr_0 = 3.2211e-04
Loss = 1.0611e-01, PNorm = 80.7354, GNorm = 0.6008, lr_0 = 3.2189e-04
Loss = 9.9746e-02, PNorm = 80.7435, GNorm = 0.5186, lr_0 = 3.2167e-04
Loss = 1.1526e-01, PNorm = 80.7500, GNorm = 0.6669, lr_0 = 3.2145e-04
Loss = 1.0317e-01, PNorm = 80.7558, GNorm = 0.5890, lr_0 = 3.2123e-04
Loss = 1.1360e-01, PNorm = 80.7574, GNorm = 0.8935, lr_0 = 3.2101e-04
Loss = 1.1492e-01, PNorm = 80.7603, GNorm = 0.6185, lr_0 = 3.2079e-04
Loss = 1.0032e-01, PNorm = 80.7626, GNorm = 0.5843, lr_0 = 3.2057e-04
Loss = 1.0780e-01, PNorm = 80.7704, GNorm = 0.7440, lr_0 = 3.2035e-04
Loss = 1.1246e-01, PNorm = 80.7812, GNorm = 1.0463, lr_0 = 3.2013e-04
Loss = 9.5286e-02, PNorm = 80.7893, GNorm = 0.5756, lr_0 = 3.1991e-04
Loss = 1.2628e-01, PNorm = 80.7948, GNorm = 0.7358, lr_0 = 3.1969e-04
Loss = 9.6468e-02, PNorm = 80.8037, GNorm = 0.6146, lr_0 = 3.1947e-04
Loss = 1.1177e-01, PNorm = 80.8120, GNorm = 0.8553, lr_0 = 3.1925e-04
Loss = 1.1282e-01, PNorm = 80.8166, GNorm = 0.8281, lr_0 = 3.1904e-04
Loss = 1.0290e-01, PNorm = 80.8219, GNorm = 0.7557, lr_0 = 3.1882e-04
Loss = 9.2212e-02, PNorm = 80.8292, GNorm = 0.8984, lr_0 = 3.1860e-04
Loss = 1.1167e-01, PNorm = 80.8350, GNorm = 0.5506, lr_0 = 3.1838e-04
Loss = 1.0545e-01, PNorm = 80.8457, GNorm = 0.4867, lr_0 = 3.1816e-04
Loss = 1.1885e-01, PNorm = 80.8495, GNorm = 0.6694, lr_0 = 3.1794e-04
Loss = 1.0719e-01, PNorm = 80.8555, GNorm = 0.8245, lr_0 = 3.1773e-04
Loss = 1.1436e-01, PNorm = 80.8588, GNorm = 0.7917, lr_0 = 3.1751e-04
Loss = 1.0426e-01, PNorm = 80.8647, GNorm = 0.5282, lr_0 = 3.1729e-04
Loss = 1.1006e-01, PNorm = 80.8714, GNorm = 0.7775, lr_0 = 3.1707e-04
Loss = 1.0744e-01, PNorm = 80.8795, GNorm = 0.6754, lr_0 = 3.1686e-04
Loss = 1.0752e-01, PNorm = 80.8857, GNorm = 0.6013, lr_0 = 3.1664e-04
Loss = 1.0471e-01, PNorm = 80.8929, GNorm = 0.7224, lr_0 = 3.1642e-04
Loss = 1.0345e-01, PNorm = 80.8984, GNorm = 0.6135, lr_0 = 3.1621e-04
Validation mae = 0.228837
Epoch 16
Loss = 9.6297e-02, PNorm = 80.9061, GNorm = 0.6720, lr_0 = 3.1599e-04
Loss = 1.0558e-01, PNorm = 80.9127, GNorm = 0.5745, lr_0 = 3.1577e-04
Loss = 1.0796e-01, PNorm = 80.9189, GNorm = 0.7220, lr_0 = 3.1556e-04
Loss = 9.6759e-02, PNorm = 80.9250, GNorm = 0.6018, lr_0 = 3.1534e-04
Loss = 1.0631e-01, PNorm = 80.9311, GNorm = 0.8006, lr_0 = 3.1512e-04
Loss = 1.0476e-01, PNorm = 80.9390, GNorm = 0.7378, lr_0 = 3.1491e-04
Loss = 9.8205e-02, PNorm = 80.9458, GNorm = 0.5825, lr_0 = 3.1469e-04
Loss = 1.0115e-01, PNorm = 80.9542, GNorm = 0.8102, lr_0 = 3.1448e-04
Loss = 9.1341e-02, PNorm = 80.9606, GNorm = 0.8553, lr_0 = 3.1426e-04
Loss = 1.0586e-01, PNorm = 80.9629, GNorm = 0.8588, lr_0 = 3.1405e-04
Loss = 1.0398e-01, PNorm = 80.9707, GNorm = 0.5645, lr_0 = 3.1383e-04
Loss = 1.1031e-01, PNorm = 80.9778, GNorm = 0.7302, lr_0 = 3.1362e-04
Loss = 1.1098e-01, PNorm = 80.9892, GNorm = 0.9848, lr_0 = 3.1340e-04
Loss = 9.1628e-02, PNorm = 80.9962, GNorm = 0.7478, lr_0 = 3.1319e-04
Loss = 9.8046e-02, PNorm = 80.9995, GNorm = 0.9542, lr_0 = 3.1297e-04
Loss = 1.2740e-01, PNorm = 81.0072, GNorm = 0.7482, lr_0 = 3.1276e-04
Loss = 1.0513e-01, PNorm = 81.0143, GNorm = 0.7956, lr_0 = 3.1254e-04
Loss = 1.0232e-01, PNorm = 81.0235, GNorm = 0.6243, lr_0 = 3.1233e-04
Loss = 1.1106e-01, PNorm = 81.0335, GNorm = 0.7926, lr_0 = 3.1212e-04
Loss = 9.7111e-02, PNorm = 81.0422, GNorm = 0.6071, lr_0 = 3.1190e-04
Loss = 1.0291e-01, PNorm = 81.0508, GNorm = 0.9051, lr_0 = 3.1169e-04
Loss = 9.9872e-02, PNorm = 81.0579, GNorm = 0.6442, lr_0 = 3.1147e-04
Loss = 9.5680e-02, PNorm = 81.0664, GNorm = 0.7911, lr_0 = 3.1126e-04
Loss = 1.0552e-01, PNorm = 81.0691, GNorm = 0.5075, lr_0 = 3.1105e-04
Loss = 1.1683e-01, PNorm = 81.0779, GNorm = 0.6916, lr_0 = 3.1083e-04
Loss = 9.6560e-02, PNorm = 81.0864, GNorm = 0.7712, lr_0 = 3.1062e-04
Loss = 1.0383e-01, PNorm = 81.0945, GNorm = 0.6181, lr_0 = 3.1041e-04
Loss = 1.0036e-01, PNorm = 81.1023, GNorm = 0.5174, lr_0 = 3.1020e-04
Loss = 1.0624e-01, PNorm = 81.1135, GNorm = 0.6077, lr_0 = 3.0998e-04
Loss = 1.1470e-01, PNorm = 81.1253, GNorm = 0.5452, lr_0 = 3.0977e-04
Loss = 1.0479e-01, PNorm = 81.1355, GNorm = 0.7687, lr_0 = 3.0956e-04
Loss = 9.6812e-02, PNorm = 81.1425, GNorm = 0.8289, lr_0 = 3.0935e-04
Loss = 1.0581e-01, PNorm = 81.1435, GNorm = 0.6036, lr_0 = 3.0914e-04
Loss = 1.0600e-01, PNorm = 81.1481, GNorm = 0.5063, lr_0 = 3.0892e-04
Loss = 1.0222e-01, PNorm = 81.1548, GNorm = 0.6894, lr_0 = 3.0871e-04
Loss = 9.0999e-02, PNorm = 81.1631, GNorm = 0.6616, lr_0 = 3.0850e-04
Loss = 9.5605e-02, PNorm = 81.1716, GNorm = 0.7052, lr_0 = 3.0829e-04
Loss = 1.0653e-01, PNorm = 81.1797, GNorm = 0.5703, lr_0 = 3.0808e-04
Loss = 1.0634e-01, PNorm = 81.1868, GNorm = 0.8392, lr_0 = 3.0787e-04
Loss = 9.6356e-02, PNorm = 81.1935, GNorm = 0.5784, lr_0 = 3.0766e-04
Loss = 1.0352e-01, PNorm = 81.2002, GNorm = 0.8934, lr_0 = 3.0745e-04
Loss = 1.1008e-01, PNorm = 81.2109, GNorm = 0.6921, lr_0 = 3.0723e-04
Loss = 1.0116e-01, PNorm = 81.2169, GNorm = 0.5227, lr_0 = 3.0702e-04
Loss = 9.1567e-02, PNorm = 81.2206, GNorm = 0.9829, lr_0 = 3.0681e-04
Loss = 1.0323e-01, PNorm = 81.2276, GNorm = 0.5403, lr_0 = 3.0660e-04
Loss = 9.6572e-02, PNorm = 81.2336, GNorm = 0.7211, lr_0 = 3.0639e-04
Loss = 1.1423e-01, PNorm = 81.2405, GNorm = 1.5464, lr_0 = 3.0618e-04
Loss = 1.1544e-01, PNorm = 81.2476, GNorm = 0.6829, lr_0 = 3.0597e-04
Loss = 1.0412e-01, PNorm = 81.2519, GNorm = 0.4808, lr_0 = 3.0576e-04
Loss = 1.0030e-01, PNorm = 81.2620, GNorm = 0.9767, lr_0 = 3.0555e-04
Loss = 9.5474e-02, PNorm = 81.2667, GNorm = 0.6642, lr_0 = 3.0535e-04
Loss = 1.0461e-01, PNorm = 81.2731, GNorm = 0.4900, lr_0 = 3.0514e-04
Loss = 1.0144e-01, PNorm = 81.2818, GNorm = 0.9470, lr_0 = 3.0493e-04
Loss = 1.0700e-01, PNorm = 81.2896, GNorm = 0.5179, lr_0 = 3.0472e-04
Loss = 1.0187e-01, PNorm = 81.2959, GNorm = 0.5892, lr_0 = 3.0451e-04
Loss = 9.2460e-02, PNorm = 81.3022, GNorm = 0.5516, lr_0 = 3.0430e-04
Loss = 1.1814e-01, PNorm = 81.3077, GNorm = 0.5631, lr_0 = 3.0409e-04
Loss = 1.0459e-01, PNorm = 81.3131, GNorm = 0.5322, lr_0 = 3.0388e-04
Loss = 1.1082e-01, PNorm = 81.3193, GNorm = 1.0821, lr_0 = 3.0368e-04
Loss = 1.1087e-01, PNorm = 81.3302, GNorm = 0.5881, lr_0 = 3.0347e-04
Loss = 1.1039e-01, PNorm = 81.3387, GNorm = 0.5758, lr_0 = 3.0326e-04
Loss = 9.4803e-02, PNorm = 81.3475, GNorm = 0.9523, lr_0 = 3.0305e-04
Loss = 1.2095e-01, PNorm = 81.3597, GNorm = 0.7818, lr_0 = 3.0284e-04
Loss = 1.0878e-01, PNorm = 81.3646, GNorm = 0.7559, lr_0 = 3.0264e-04
Loss = 1.1474e-01, PNorm = 81.3731, GNorm = 0.8544, lr_0 = 3.0243e-04
Loss = 1.1277e-01, PNorm = 81.3775, GNorm = 0.6648, lr_0 = 3.0222e-04
Loss = 1.0472e-01, PNorm = 81.3834, GNorm = 0.7953, lr_0 = 3.0202e-04
Loss = 1.0909e-01, PNorm = 81.3906, GNorm = 0.6893, lr_0 = 3.0181e-04
Loss = 1.0009e-01, PNorm = 81.3977, GNorm = 0.6673, lr_0 = 3.0160e-04
Loss = 9.1514e-02, PNorm = 81.4028, GNorm = 0.5222, lr_0 = 3.0140e-04
Loss = 1.1256e-01, PNorm = 81.4071, GNorm = 0.6494, lr_0 = 3.0119e-04
Loss = 9.9411e-02, PNorm = 81.4126, GNorm = 0.5126, lr_0 = 3.0098e-04
Loss = 1.1155e-01, PNorm = 81.4214, GNorm = 0.9455, lr_0 = 3.0078e-04
Loss = 9.7661e-02, PNorm = 81.4297, GNorm = 0.5905, lr_0 = 3.0057e-04
Loss = 1.0436e-01, PNorm = 81.4353, GNorm = 0.6272, lr_0 = 3.0036e-04
Loss = 1.0526e-01, PNorm = 81.4417, GNorm = 0.5454, lr_0 = 3.0016e-04
Loss = 9.3054e-02, PNorm = 81.4470, GNorm = 0.5848, lr_0 = 2.9995e-04
Loss = 1.1118e-01, PNorm = 81.4513, GNorm = 0.8510, lr_0 = 2.9975e-04
Loss = 9.8429e-02, PNorm = 81.4557, GNorm = 0.6220, lr_0 = 2.9954e-04
Loss = 1.0080e-01, PNorm = 81.4583, GNorm = 0.7905, lr_0 = 2.9934e-04
Loss = 1.2025e-01, PNorm = 81.4636, GNorm = 0.8774, lr_0 = 2.9913e-04
Loss = 1.2133e-01, PNorm = 81.4692, GNorm = 1.0166, lr_0 = 2.9893e-04
Loss = 1.0286e-01, PNorm = 81.4792, GNorm = 0.6837, lr_0 = 2.9872e-04
Loss = 1.1197e-01, PNorm = 81.4896, GNorm = 0.6107, lr_0 = 2.9852e-04
Loss = 9.9573e-02, PNorm = 81.4954, GNorm = 0.9011, lr_0 = 2.9831e-04
Loss = 1.0152e-01, PNorm = 81.4991, GNorm = 0.7941, lr_0 = 2.9811e-04
Loss = 1.0350e-01, PNorm = 81.5048, GNorm = 1.0156, lr_0 = 2.9790e-04
Loss = 1.1303e-01, PNorm = 81.5116, GNorm = 0.6514, lr_0 = 2.9770e-04
Loss = 1.0005e-01, PNorm = 81.5183, GNorm = 0.4495, lr_0 = 2.9750e-04
Loss = 1.2283e-01, PNorm = 81.5261, GNorm = 0.6736, lr_0 = 2.9729e-04
Loss = 1.0007e-01, PNorm = 81.5346, GNorm = 0.6645, lr_0 = 2.9709e-04
Loss = 1.0675e-01, PNorm = 81.5446, GNorm = 0.6945, lr_0 = 2.9689e-04
Loss = 1.0800e-01, PNorm = 81.5494, GNorm = 0.4796, lr_0 = 2.9668e-04
Loss = 1.0370e-01, PNorm = 81.5546, GNorm = 0.4833, lr_0 = 2.9648e-04
Loss = 1.0978e-01, PNorm = 81.5626, GNorm = 1.1122, lr_0 = 2.9628e-04
Loss = 9.3828e-02, PNorm = 81.5690, GNorm = 0.6311, lr_0 = 2.9607e-04
Loss = 9.9642e-02, PNorm = 81.5770, GNorm = 0.6416, lr_0 = 2.9587e-04
Loss = 9.8243e-02, PNorm = 81.5794, GNorm = 0.7227, lr_0 = 2.9567e-04
Loss = 9.8649e-02, PNorm = 81.5854, GNorm = 0.4803, lr_0 = 2.9546e-04
Loss = 9.6954e-02, PNorm = 81.5920, GNorm = 0.7068, lr_0 = 2.9526e-04
Loss = 9.9313e-02, PNorm = 81.5978, GNorm = 0.6207, lr_0 = 2.9506e-04
Loss = 1.0607e-01, PNorm = 81.6050, GNorm = 0.8590, lr_0 = 2.9486e-04
Loss = 1.1036e-01, PNorm = 81.6102, GNorm = 0.8329, lr_0 = 2.9466e-04
Loss = 1.1729e-01, PNorm = 81.6148, GNorm = 0.7327, lr_0 = 2.9445e-04
Loss = 9.4193e-02, PNorm = 81.6194, GNorm = 0.6679, lr_0 = 2.9425e-04
Loss = 1.1490e-01, PNorm = 81.6240, GNorm = 0.6374, lr_0 = 2.9405e-04
Loss = 1.3434e-01, PNorm = 81.6323, GNorm = 0.8009, lr_0 = 2.9385e-04
Loss = 1.0982e-01, PNorm = 81.6418, GNorm = 0.9100, lr_0 = 2.9365e-04
Loss = 1.0436e-01, PNorm = 81.6454, GNorm = 0.6166, lr_0 = 2.9345e-04
Loss = 8.8902e-02, PNorm = 81.6513, GNorm = 0.6629, lr_0 = 2.9325e-04
Loss = 1.0429e-01, PNorm = 81.6572, GNorm = 0.6645, lr_0 = 2.9305e-04
Loss = 1.1078e-01, PNorm = 81.6620, GNorm = 0.5667, lr_0 = 2.9284e-04
Loss = 9.1150e-02, PNorm = 81.6699, GNorm = 0.4991, lr_0 = 2.9264e-04
Loss = 1.1312e-01, PNorm = 81.6788, GNorm = 0.7963, lr_0 = 2.9244e-04
Loss = 1.0095e-01, PNorm = 81.6860, GNorm = 0.5749, lr_0 = 2.9224e-04
Loss = 9.3447e-02, PNorm = 81.6919, GNorm = 0.7212, lr_0 = 2.9204e-04
Loss = 1.0602e-01, PNorm = 81.6972, GNorm = 0.5580, lr_0 = 2.9184e-04
Loss = 9.4744e-02, PNorm = 81.7040, GNorm = 0.7428, lr_0 = 2.9164e-04
Loss = 1.1885e-01, PNorm = 81.7092, GNorm = 0.9839, lr_0 = 2.9144e-04
Loss = 1.0914e-01, PNorm = 81.7205, GNorm = 0.6594, lr_0 = 2.9124e-04
Validation mae = 0.227786
Epoch 17
Loss = 1.0801e-01, PNorm = 81.7327, GNorm = 0.6658, lr_0 = 2.9104e-04
Loss = 9.6664e-02, PNorm = 81.7398, GNorm = 0.7373, lr_0 = 2.9084e-04
Loss = 8.6233e-02, PNorm = 81.7447, GNorm = 0.4279, lr_0 = 2.9065e-04
Loss = 8.7520e-02, PNorm = 81.7478, GNorm = 0.4437, lr_0 = 2.9045e-04
Loss = 8.4794e-02, PNorm = 81.7528, GNorm = 0.7386, lr_0 = 2.9025e-04
Loss = 1.0933e-01, PNorm = 81.7596, GNorm = 0.8597, lr_0 = 2.9005e-04
Loss = 1.0816e-01, PNorm = 81.7665, GNorm = 0.5333, lr_0 = 2.8985e-04
Loss = 1.1249e-01, PNorm = 81.7715, GNorm = 0.6000, lr_0 = 2.8965e-04
Loss = 9.9264e-02, PNorm = 81.7798, GNorm = 0.6644, lr_0 = 2.8945e-04
Loss = 9.6888e-02, PNorm = 81.7811, GNorm = 1.0635, lr_0 = 2.8925e-04
Loss = 9.2334e-02, PNorm = 81.7888, GNorm = 0.5538, lr_0 = 2.8906e-04
Loss = 1.0025e-01, PNorm = 81.7941, GNorm = 0.8922, lr_0 = 2.8886e-04
Loss = 9.6859e-02, PNorm = 81.7958, GNorm = 0.6519, lr_0 = 2.8866e-04
Loss = 1.0099e-01, PNorm = 81.8030, GNorm = 0.5085, lr_0 = 2.8846e-04
Loss = 9.8654e-02, PNorm = 81.8094, GNorm = 0.6088, lr_0 = 2.8826e-04
Loss = 9.4364e-02, PNorm = 81.8175, GNorm = 0.5541, lr_0 = 2.8807e-04
Loss = 9.7488e-02, PNorm = 81.8257, GNorm = 1.0944, lr_0 = 2.8787e-04
Loss = 1.0510e-01, PNorm = 81.8331, GNorm = 0.6351, lr_0 = 2.8767e-04
Loss = 1.1052e-01, PNorm = 81.8373, GNorm = 0.6964, lr_0 = 2.8748e-04
Loss = 9.3923e-02, PNorm = 81.8453, GNorm = 0.7263, lr_0 = 2.8728e-04
Loss = 9.6806e-02, PNorm = 81.8516, GNorm = 0.6882, lr_0 = 2.8708e-04
Loss = 1.0166e-01, PNorm = 81.8590, GNorm = 0.8563, lr_0 = 2.8689e-04
Loss = 9.8699e-02, PNorm = 81.8656, GNorm = 0.5331, lr_0 = 2.8669e-04
Loss = 8.0792e-02, PNorm = 81.8754, GNorm = 0.4131, lr_0 = 2.8649e-04
Loss = 1.0458e-01, PNorm = 81.8786, GNorm = 0.5864, lr_0 = 2.8630e-04
Loss = 1.1118e-01, PNorm = 81.8845, GNorm = 0.7068, lr_0 = 2.8610e-04
Loss = 1.0072e-01, PNorm = 81.8893, GNorm = 0.7089, lr_0 = 2.8590e-04
Loss = 1.0939e-01, PNorm = 81.8947, GNorm = 0.8398, lr_0 = 2.8571e-04
Loss = 1.1306e-01, PNorm = 81.9006, GNorm = 0.7936, lr_0 = 2.8551e-04
Loss = 1.0189e-01, PNorm = 81.9087, GNorm = 0.6500, lr_0 = 2.8532e-04
Loss = 1.2071e-01, PNorm = 81.9160, GNorm = 0.6652, lr_0 = 2.8512e-04
Loss = 1.0293e-01, PNorm = 81.9258, GNorm = 0.8251, lr_0 = 2.8493e-04
Loss = 1.0661e-01, PNorm = 81.9312, GNorm = 0.9880, lr_0 = 2.8473e-04
Loss = 1.0218e-01, PNorm = 81.9389, GNorm = 0.7322, lr_0 = 2.8454e-04
Loss = 9.4605e-02, PNorm = 81.9447, GNorm = 0.9855, lr_0 = 2.8434e-04
Loss = 1.1076e-01, PNorm = 81.9505, GNorm = 0.6424, lr_0 = 2.8415e-04
Loss = 9.5023e-02, PNorm = 81.9563, GNorm = 0.5355, lr_0 = 2.8395e-04
Loss = 9.0472e-02, PNorm = 81.9619, GNorm = 0.6143, lr_0 = 2.8376e-04
Loss = 1.0924e-01, PNorm = 81.9686, GNorm = 0.6362, lr_0 = 2.8356e-04
Loss = 9.5884e-02, PNorm = 81.9767, GNorm = 0.6369, lr_0 = 2.8337e-04
Loss = 9.3771e-02, PNorm = 81.9813, GNorm = 0.6889, lr_0 = 2.8317e-04
Loss = 1.0227e-01, PNorm = 81.9876, GNorm = 0.8253, lr_0 = 2.8298e-04
Loss = 1.0846e-01, PNorm = 81.9945, GNorm = 0.6238, lr_0 = 2.8279e-04
Loss = 7.8195e-02, PNorm = 81.9999, GNorm = 0.6503, lr_0 = 2.8259e-04
Loss = 1.0265e-01, PNorm = 82.0050, GNorm = 0.7926, lr_0 = 2.8240e-04
Loss = 1.0802e-01, PNorm = 82.0087, GNorm = 0.7315, lr_0 = 2.8221e-04
Loss = 1.0815e-01, PNorm = 82.0127, GNorm = 0.7140, lr_0 = 2.8201e-04
Loss = 1.0950e-01, PNorm = 82.0216, GNorm = 1.1063, lr_0 = 2.8182e-04
Loss = 1.0290e-01, PNorm = 82.0304, GNorm = 0.8747, lr_0 = 2.8163e-04
Loss = 8.9068e-02, PNorm = 82.0357, GNorm = 0.5505, lr_0 = 2.8143e-04
Loss = 1.0182e-01, PNorm = 82.0405, GNorm = 0.6873, lr_0 = 2.8124e-04
Loss = 1.1118e-01, PNorm = 82.0445, GNorm = 0.7219, lr_0 = 2.8105e-04
Loss = 9.4800e-02, PNorm = 82.0499, GNorm = 0.6561, lr_0 = 2.8085e-04
Loss = 1.0826e-01, PNorm = 82.0587, GNorm = 0.8363, lr_0 = 2.8066e-04
Loss = 9.8851e-02, PNorm = 82.0660, GNorm = 0.7044, lr_0 = 2.8047e-04
Loss = 1.0515e-01, PNorm = 82.0695, GNorm = 0.8984, lr_0 = 2.8028e-04
Loss = 1.0281e-01, PNorm = 82.0748, GNorm = 0.6313, lr_0 = 2.8009e-04
Loss = 9.7630e-02, PNorm = 82.0812, GNorm = 0.7499, lr_0 = 2.7989e-04
Loss = 9.7506e-02, PNorm = 82.0900, GNorm = 0.7423, lr_0 = 2.7970e-04
Loss = 1.0829e-01, PNorm = 82.0979, GNorm = 0.7203, lr_0 = 2.7951e-04
Loss = 9.6744e-02, PNorm = 82.1032, GNorm = 0.5520, lr_0 = 2.7932e-04
Loss = 9.9016e-02, PNorm = 82.1091, GNorm = 0.6700, lr_0 = 2.7913e-04
Loss = 1.0732e-01, PNorm = 82.1136, GNorm = 0.6207, lr_0 = 2.7894e-04
Loss = 9.3230e-02, PNorm = 82.1156, GNorm = 0.5790, lr_0 = 2.7875e-04
Loss = 1.1669e-01, PNorm = 82.1186, GNorm = 0.7641, lr_0 = 2.7855e-04
Loss = 1.0620e-01, PNorm = 82.1250, GNorm = 0.6126, lr_0 = 2.7836e-04
Loss = 1.0230e-01, PNorm = 82.1332, GNorm = 0.6614, lr_0 = 2.7817e-04
Loss = 1.0224e-01, PNorm = 82.1417, GNorm = 0.7172, lr_0 = 2.7798e-04
Loss = 8.1162e-02, PNorm = 82.1491, GNorm = 0.5729, lr_0 = 2.7779e-04
Loss = 1.1359e-01, PNorm = 82.1560, GNorm = 0.8165, lr_0 = 2.7760e-04
Loss = 1.0494e-01, PNorm = 82.1589, GNorm = 0.7674, lr_0 = 2.7741e-04
Loss = 9.9634e-02, PNorm = 82.1640, GNorm = 0.8891, lr_0 = 2.7722e-04
Loss = 9.3232e-02, PNorm = 82.1671, GNorm = 0.6491, lr_0 = 2.7703e-04
Loss = 9.7238e-02, PNorm = 82.1690, GNorm = 0.5788, lr_0 = 2.7684e-04
Loss = 9.2339e-02, PNorm = 82.1717, GNorm = 0.6188, lr_0 = 2.7665e-04
Loss = 9.8060e-02, PNorm = 82.1771, GNorm = 0.7608, lr_0 = 2.7646e-04
Loss = 9.4615e-02, PNorm = 82.1801, GNorm = 0.5830, lr_0 = 2.7627e-04
Loss = 9.4107e-02, PNorm = 82.1827, GNorm = 0.5389, lr_0 = 2.7608e-04
Loss = 1.0514e-01, PNorm = 82.1875, GNorm = 0.5708, lr_0 = 2.7590e-04
Loss = 1.1298e-01, PNorm = 82.1944, GNorm = 1.0503, lr_0 = 2.7571e-04
Loss = 1.0260e-01, PNorm = 82.2036, GNorm = 0.7611, lr_0 = 2.7552e-04
Loss = 9.7095e-02, PNorm = 82.2135, GNorm = 0.7348, lr_0 = 2.7533e-04
Loss = 9.3726e-02, PNorm = 82.2189, GNorm = 0.8939, lr_0 = 2.7514e-04
Loss = 1.0550e-01, PNorm = 82.2245, GNorm = 0.6614, lr_0 = 2.7495e-04
Loss = 1.1798e-01, PNorm = 82.2321, GNorm = 0.6627, lr_0 = 2.7476e-04
Loss = 9.9263e-02, PNorm = 82.2367, GNorm = 0.9178, lr_0 = 2.7457e-04
Loss = 9.5669e-02, PNorm = 82.2449, GNorm = 0.5686, lr_0 = 2.7439e-04
Loss = 1.1081e-01, PNorm = 82.2492, GNorm = 0.4903, lr_0 = 2.7420e-04
Loss = 8.7823e-02, PNorm = 82.2523, GNorm = 0.5595, lr_0 = 2.7401e-04
Loss = 9.3117e-02, PNorm = 82.2582, GNorm = 0.4693, lr_0 = 2.7382e-04
Loss = 8.7148e-02, PNorm = 82.2628, GNorm = 0.5360, lr_0 = 2.7364e-04
Loss = 9.1530e-02, PNorm = 82.2675, GNorm = 0.5766, lr_0 = 2.7345e-04
Loss = 1.0332e-01, PNorm = 82.2749, GNorm = 0.5130, lr_0 = 2.7326e-04
Loss = 9.9830e-02, PNorm = 82.2788, GNorm = 0.6259, lr_0 = 2.7307e-04
Loss = 9.2847e-02, PNorm = 82.2848, GNorm = 0.6577, lr_0 = 2.7289e-04
Loss = 1.1315e-01, PNorm = 82.2925, GNorm = 0.7576, lr_0 = 2.7270e-04
Loss = 9.2805e-02, PNorm = 82.2983, GNorm = 0.6354, lr_0 = 2.7251e-04
Loss = 1.0989e-01, PNorm = 82.3056, GNorm = 0.6840, lr_0 = 2.7233e-04
Loss = 9.5845e-02, PNorm = 82.3136, GNorm = 0.5305, lr_0 = 2.7214e-04
Loss = 9.1273e-02, PNorm = 82.3217, GNorm = 0.5175, lr_0 = 2.7195e-04
Loss = 9.7256e-02, PNorm = 82.3273, GNorm = 0.7289, lr_0 = 2.7177e-04
Loss = 9.9772e-02, PNorm = 82.3326, GNorm = 0.7727, lr_0 = 2.7158e-04
Loss = 9.5955e-02, PNorm = 82.3380, GNorm = 0.5692, lr_0 = 2.7139e-04
Loss = 9.4799e-02, PNorm = 82.3436, GNorm = 0.6836, lr_0 = 2.7121e-04
Loss = 1.0703e-01, PNorm = 82.3481, GNorm = 0.8648, lr_0 = 2.7102e-04
Loss = 1.1032e-01, PNorm = 82.3516, GNorm = 0.7207, lr_0 = 2.7084e-04
Loss = 9.1777e-02, PNorm = 82.3579, GNorm = 0.9781, lr_0 = 2.7065e-04
Loss = 9.6003e-02, PNorm = 82.3644, GNorm = 0.5834, lr_0 = 2.7047e-04
Loss = 9.8222e-02, PNorm = 82.3745, GNorm = 0.9763, lr_0 = 2.7028e-04
Loss = 9.7805e-02, PNorm = 82.3827, GNorm = 0.5562, lr_0 = 2.7010e-04
Loss = 9.8340e-02, PNorm = 82.3871, GNorm = 0.8508, lr_0 = 2.6991e-04
Loss = 1.0064e-01, PNorm = 82.3924, GNorm = 0.5279, lr_0 = 2.6973e-04
Loss = 9.9049e-02, PNorm = 82.3941, GNorm = 0.6570, lr_0 = 2.6954e-04
Loss = 9.9299e-02, PNorm = 82.3983, GNorm = 0.7339, lr_0 = 2.6936e-04
Loss = 9.3727e-02, PNorm = 82.4037, GNorm = 0.6045, lr_0 = 2.6917e-04
Loss = 1.0204e-01, PNorm = 82.4087, GNorm = 0.9119, lr_0 = 2.6899e-04
Loss = 1.0342e-01, PNorm = 82.4111, GNorm = 0.8354, lr_0 = 2.6880e-04
Loss = 1.1250e-01, PNorm = 82.4156, GNorm = 0.7568, lr_0 = 2.6862e-04
Loss = 1.0960e-01, PNorm = 82.4222, GNorm = 1.1680, lr_0 = 2.6844e-04
Loss = 9.7584e-02, PNorm = 82.4240, GNorm = 0.5789, lr_0 = 2.6825e-04
Validation mae = 0.227381
Epoch 18
Loss = 9.9230e-02, PNorm = 82.4278, GNorm = 0.6663, lr_0 = 2.6807e-04
Loss = 8.4847e-02, PNorm = 82.4319, GNorm = 0.4567, lr_0 = 2.6788e-04
Loss = 8.4356e-02, PNorm = 82.4390, GNorm = 0.6547, lr_0 = 2.6770e-04
Loss = 1.1190e-01, PNorm = 82.4484, GNorm = 0.7866, lr_0 = 2.6752e-04
Loss = 8.3307e-02, PNorm = 82.4553, GNorm = 0.5343, lr_0 = 2.6733e-04
Loss = 8.8014e-02, PNorm = 82.4612, GNorm = 0.9304, lr_0 = 2.6715e-04
Loss = 9.6148e-02, PNorm = 82.4675, GNorm = 0.7269, lr_0 = 2.6697e-04
Loss = 9.4509e-02, PNorm = 82.4712, GNorm = 0.6653, lr_0 = 2.6678e-04
Loss = 9.5387e-02, PNorm = 82.4745, GNorm = 0.6382, lr_0 = 2.6660e-04
Loss = 8.9473e-02, PNorm = 82.4802, GNorm = 0.5649, lr_0 = 2.6642e-04
Loss = 8.4152e-02, PNorm = 82.4874, GNorm = 0.9601, lr_0 = 2.6624e-04
Loss = 1.0383e-01, PNorm = 82.4941, GNorm = 0.6396, lr_0 = 2.6605e-04
Loss = 1.0107e-01, PNorm = 82.4997, GNorm = 0.5815, lr_0 = 2.6587e-04
Loss = 1.0912e-01, PNorm = 82.5064, GNorm = 0.7294, lr_0 = 2.6569e-04
Loss = 9.9354e-02, PNorm = 82.5084, GNorm = 0.7528, lr_0 = 2.6551e-04
Loss = 9.5697e-02, PNorm = 82.5107, GNorm = 0.7342, lr_0 = 2.6533e-04
Loss = 9.8368e-02, PNorm = 82.5140, GNorm = 0.5722, lr_0 = 2.6514e-04
Loss = 1.0482e-01, PNorm = 82.5190, GNorm = 0.7288, lr_0 = 2.6496e-04
Loss = 1.0201e-01, PNorm = 82.5252, GNorm = 0.9315, lr_0 = 2.6478e-04
Loss = 8.2086e-02, PNorm = 82.5303, GNorm = 0.8047, lr_0 = 2.6460e-04
Loss = 9.5754e-02, PNorm = 82.5364, GNorm = 0.7032, lr_0 = 2.6442e-04
Loss = 1.0035e-01, PNorm = 82.5411, GNorm = 0.7248, lr_0 = 2.6424e-04
Loss = 8.8595e-02, PNorm = 82.5485, GNorm = 0.6124, lr_0 = 2.6406e-04
Loss = 9.2300e-02, PNorm = 82.5528, GNorm = 0.5901, lr_0 = 2.6388e-04
Loss = 9.2619e-02, PNorm = 82.5633, GNorm = 0.6683, lr_0 = 2.6369e-04
Loss = 8.0904e-02, PNorm = 82.5698, GNorm = 1.0239, lr_0 = 2.6351e-04
Loss = 1.0419e-01, PNorm = 82.5776, GNorm = 0.4703, lr_0 = 2.6333e-04
Loss = 9.3242e-02, PNorm = 82.5867, GNorm = 0.5306, lr_0 = 2.6315e-04
Loss = 9.6388e-02, PNorm = 82.5937, GNorm = 0.9194, lr_0 = 2.6297e-04
Loss = 8.8315e-02, PNorm = 82.6013, GNorm = 0.7998, lr_0 = 2.6279e-04
Loss = 8.6178e-02, PNorm = 82.6081, GNorm = 0.5707, lr_0 = 2.6261e-04
Loss = 1.1985e-01, PNorm = 82.6139, GNorm = 0.5862, lr_0 = 2.6243e-04
Loss = 8.5974e-02, PNorm = 82.6191, GNorm = 0.5396, lr_0 = 2.6225e-04
Loss = 9.5378e-02, PNorm = 82.6256, GNorm = 0.4937, lr_0 = 2.6207e-04
Loss = 8.7124e-02, PNorm = 82.6325, GNorm = 0.5255, lr_0 = 2.6189e-04
Loss = 1.0888e-01, PNorm = 82.6384, GNorm = 0.9585, lr_0 = 2.6171e-04
Loss = 1.0654e-01, PNorm = 82.6446, GNorm = 0.6496, lr_0 = 2.6153e-04
Loss = 1.0546e-01, PNorm = 82.6482, GNorm = 0.7301, lr_0 = 2.6136e-04
Loss = 9.4561e-02, PNorm = 82.6531, GNorm = 0.5314, lr_0 = 2.6118e-04
Loss = 8.9016e-02, PNorm = 82.6568, GNorm = 0.5738, lr_0 = 2.6100e-04
Loss = 1.0076e-01, PNorm = 82.6609, GNorm = 0.7050, lr_0 = 2.6082e-04
Loss = 9.8634e-02, PNorm = 82.6652, GNorm = 0.5092, lr_0 = 2.6064e-04
Loss = 9.1110e-02, PNorm = 82.6669, GNorm = 0.6522, lr_0 = 2.6046e-04
Loss = 8.5412e-02, PNorm = 82.6696, GNorm = 0.5079, lr_0 = 2.6028e-04
Loss = 9.2889e-02, PNorm = 82.6732, GNorm = 0.5198, lr_0 = 2.6011e-04
Loss = 9.2219e-02, PNorm = 82.6762, GNorm = 1.2047, lr_0 = 2.5993e-04
Loss = 1.0566e-01, PNorm = 82.6800, GNorm = 0.9342, lr_0 = 2.5975e-04
Loss = 8.9248e-02, PNorm = 82.6830, GNorm = 0.6337, lr_0 = 2.5957e-04
Loss = 9.6489e-02, PNorm = 82.6856, GNorm = 0.5932, lr_0 = 2.5939e-04
Loss = 1.0224e-01, PNorm = 82.6870, GNorm = 0.7203, lr_0 = 2.5922e-04
Loss = 9.3424e-02, PNorm = 82.6930, GNorm = 0.6339, lr_0 = 2.5904e-04
Loss = 8.9411e-02, PNorm = 82.6968, GNorm = 0.6591, lr_0 = 2.5886e-04
Loss = 9.8022e-02, PNorm = 82.7008, GNorm = 0.9820, lr_0 = 2.5868e-04
Loss = 9.5893e-02, PNorm = 82.7032, GNorm = 0.7701, lr_0 = 2.5851e-04
Loss = 1.0816e-01, PNorm = 82.7092, GNorm = 0.6940, lr_0 = 2.5833e-04
Loss = 1.1100e-01, PNorm = 82.7183, GNorm = 0.6387, lr_0 = 2.5815e-04
Loss = 1.0880e-01, PNorm = 82.7236, GNorm = 0.6355, lr_0 = 2.5797e-04
Loss = 9.1679e-02, PNorm = 82.7328, GNorm = 0.5780, lr_0 = 2.5780e-04
Loss = 1.0102e-01, PNorm = 82.7390, GNorm = 0.7905, lr_0 = 2.5762e-04
Loss = 1.0511e-01, PNorm = 82.7457, GNorm = 0.6505, lr_0 = 2.5745e-04
Loss = 1.0666e-01, PNorm = 82.7527, GNorm = 0.4675, lr_0 = 2.5727e-04
Loss = 9.5178e-02, PNorm = 82.7594, GNorm = 0.8920, lr_0 = 2.5709e-04
Loss = 1.0133e-01, PNorm = 82.7651, GNorm = 0.8345, lr_0 = 2.5692e-04
Loss = 9.8648e-02, PNorm = 82.7692, GNorm = 0.8749, lr_0 = 2.5674e-04
Loss = 1.0107e-01, PNorm = 82.7747, GNorm = 0.6628, lr_0 = 2.5656e-04
Loss = 9.5591e-02, PNorm = 82.7786, GNorm = 0.6288, lr_0 = 2.5639e-04
Loss = 9.8564e-02, PNorm = 82.7817, GNorm = 0.7771, lr_0 = 2.5621e-04
Loss = 8.6408e-02, PNorm = 82.7872, GNorm = 0.9897, lr_0 = 2.5604e-04
Loss = 1.0463e-01, PNorm = 82.7924, GNorm = 0.6635, lr_0 = 2.5586e-04
Loss = 1.0564e-01, PNorm = 82.7974, GNorm = 1.0781, lr_0 = 2.5569e-04
Loss = 1.0500e-01, PNorm = 82.8025, GNorm = 0.5704, lr_0 = 2.5551e-04
Loss = 1.1533e-01, PNorm = 82.8090, GNorm = 0.5440, lr_0 = 2.5534e-04
Loss = 1.0905e-01, PNorm = 82.8133, GNorm = 0.9311, lr_0 = 2.5516e-04
Loss = 9.3005e-02, PNorm = 82.8159, GNorm = 0.5781, lr_0 = 2.5499e-04
Loss = 9.2990e-02, PNorm = 82.8215, GNorm = 0.7805, lr_0 = 2.5481e-04
Loss = 9.5994e-02, PNorm = 82.8268, GNorm = 0.7088, lr_0 = 2.5464e-04
Loss = 9.2390e-02, PNorm = 82.8328, GNorm = 0.4922, lr_0 = 2.5446e-04
Loss = 1.0052e-01, PNorm = 82.8375, GNorm = 0.6760, lr_0 = 2.5429e-04
Loss = 9.3046e-02, PNorm = 82.8432, GNorm = 0.5961, lr_0 = 2.5411e-04
Loss = 1.1140e-01, PNorm = 82.8496, GNorm = 1.1103, lr_0 = 2.5394e-04
Loss = 9.5770e-02, PNorm = 82.8553, GNorm = 0.7051, lr_0 = 2.5377e-04
Loss = 8.0772e-02, PNorm = 82.8599, GNorm = 0.4824, lr_0 = 2.5359e-04
Loss = 1.1032e-01, PNorm = 82.8658, GNorm = 0.7526, lr_0 = 2.5342e-04
Loss = 9.4624e-02, PNorm = 82.8685, GNorm = 0.5049, lr_0 = 2.5325e-04
Loss = 9.8648e-02, PNorm = 82.8716, GNorm = 0.7762, lr_0 = 2.5307e-04
Loss = 9.4652e-02, PNorm = 82.8751, GNorm = 0.6830, lr_0 = 2.5290e-04
Loss = 8.9945e-02, PNorm = 82.8779, GNorm = 0.6221, lr_0 = 2.5273e-04
Loss = 9.5512e-02, PNorm = 82.8845, GNorm = 0.6580, lr_0 = 2.5255e-04
Loss = 9.1629e-02, PNorm = 82.8929, GNorm = 0.5039, lr_0 = 2.5238e-04
Loss = 9.9099e-02, PNorm = 82.8962, GNorm = 0.5558, lr_0 = 2.5221e-04
Loss = 8.4723e-02, PNorm = 82.9037, GNorm = 0.5502, lr_0 = 2.5203e-04
Loss = 8.9518e-02, PNorm = 82.9103, GNorm = 0.6889, lr_0 = 2.5186e-04
Loss = 9.9113e-02, PNorm = 82.9130, GNorm = 0.5485, lr_0 = 2.5169e-04
Loss = 1.0321e-01, PNorm = 82.9139, GNorm = 0.9393, lr_0 = 2.5152e-04
Loss = 1.1868e-01, PNorm = 82.9171, GNorm = 0.6080, lr_0 = 2.5134e-04
Loss = 9.7614e-02, PNorm = 82.9212, GNorm = 0.8186, lr_0 = 2.5117e-04
Loss = 8.8322e-02, PNorm = 82.9258, GNorm = 0.5925, lr_0 = 2.5100e-04
Loss = 8.9376e-02, PNorm = 82.9295, GNorm = 0.7601, lr_0 = 2.5083e-04
Loss = 1.0218e-01, PNorm = 82.9361, GNorm = 0.9381, lr_0 = 2.5066e-04
Loss = 1.1281e-01, PNorm = 82.9460, GNorm = 0.8719, lr_0 = 2.5048e-04
Loss = 1.0408e-01, PNorm = 82.9544, GNorm = 0.7188, lr_0 = 2.5031e-04
Loss = 8.6026e-02, PNorm = 82.9601, GNorm = 0.7166, lr_0 = 2.5014e-04
Loss = 9.6606e-02, PNorm = 82.9680, GNorm = 0.6804, lr_0 = 2.4997e-04
Loss = 9.4304e-02, PNorm = 82.9731, GNorm = 0.6323, lr_0 = 2.4980e-04
Loss = 9.9118e-02, PNorm = 82.9735, GNorm = 0.5992, lr_0 = 2.4963e-04
Loss = 9.9309e-02, PNorm = 82.9769, GNorm = 0.7368, lr_0 = 2.4946e-04
Loss = 9.6418e-02, PNorm = 82.9820, GNorm = 0.7604, lr_0 = 2.4929e-04
Loss = 1.0237e-01, PNorm = 82.9879, GNorm = 0.6870, lr_0 = 2.4911e-04
Loss = 9.8882e-02, PNorm = 82.9935, GNorm = 0.5751, lr_0 = 2.4894e-04
Loss = 1.1413e-01, PNorm = 82.9999, GNorm = 0.8797, lr_0 = 2.4877e-04
Loss = 1.0749e-01, PNorm = 83.0025, GNorm = 0.6120, lr_0 = 2.4860e-04
Loss = 8.8963e-02, PNorm = 83.0080, GNorm = 0.5195, lr_0 = 2.4843e-04
Loss = 9.8035e-02, PNorm = 83.0128, GNorm = 0.6081, lr_0 = 2.4826e-04
Loss = 9.7599e-02, PNorm = 83.0196, GNorm = 0.7806, lr_0 = 2.4809e-04
Loss = 9.4804e-02, PNorm = 83.0250, GNorm = 0.5093, lr_0 = 2.4792e-04
Loss = 1.0326e-01, PNorm = 83.0310, GNorm = 0.6410, lr_0 = 2.4775e-04
Loss = 9.9120e-02, PNorm = 83.0365, GNorm = 0.7590, lr_0 = 2.4758e-04
Loss = 9.2841e-02, PNorm = 83.0429, GNorm = 0.5940, lr_0 = 2.4741e-04
Loss = 9.8307e-02, PNorm = 83.0500, GNorm = 1.0949, lr_0 = 2.4724e-04
Loss = 9.7072e-02, PNorm = 83.0542, GNorm = 0.6104, lr_0 = 2.4707e-04
Validation mae = 0.229649
Epoch 19
Loss = 8.2920e-02, PNorm = 83.0619, GNorm = 0.6825, lr_0 = 2.4690e-04
Loss = 9.0106e-02, PNorm = 83.0677, GNorm = 0.5810, lr_0 = 2.4674e-04
Loss = 9.2077e-02, PNorm = 83.0714, GNorm = 0.6640, lr_0 = 2.4657e-04
Loss = 8.8824e-02, PNorm = 83.0755, GNorm = 0.7607, lr_0 = 2.4640e-04
Loss = 8.8881e-02, PNorm = 83.0780, GNorm = 0.6142, lr_0 = 2.4623e-04
Loss = 9.6461e-02, PNorm = 83.0813, GNorm = 0.9388, lr_0 = 2.4606e-04
Loss = 9.6409e-02, PNorm = 83.0867, GNorm = 0.8036, lr_0 = 2.4589e-04
Loss = 9.8756e-02, PNorm = 83.0873, GNorm = 0.5601, lr_0 = 2.4572e-04
Loss = 8.9346e-02, PNorm = 83.0909, GNorm = 0.4973, lr_0 = 2.4556e-04
Loss = 9.0734e-02, PNorm = 83.0962, GNorm = 0.6202, lr_0 = 2.4539e-04
Loss = 8.8344e-02, PNorm = 83.1011, GNorm = 0.5276, lr_0 = 2.4522e-04
Loss = 9.3974e-02, PNorm = 83.1081, GNorm = 0.7582, lr_0 = 2.4505e-04
Loss = 9.1162e-02, PNorm = 83.1114, GNorm = 0.5792, lr_0 = 2.4488e-04
Loss = 9.7399e-02, PNorm = 83.1150, GNorm = 0.6504, lr_0 = 2.4472e-04
Loss = 8.3182e-02, PNorm = 83.1203, GNorm = 0.6282, lr_0 = 2.4455e-04
Loss = 9.6160e-02, PNorm = 83.1245, GNorm = 0.6757, lr_0 = 2.4438e-04
Loss = 8.9119e-02, PNorm = 83.1298, GNorm = 0.5520, lr_0 = 2.4421e-04
Loss = 9.6580e-02, PNorm = 83.1378, GNorm = 1.1073, lr_0 = 2.4405e-04
Loss = 8.4087e-02, PNorm = 83.1451, GNorm = 0.6208, lr_0 = 2.4388e-04
Loss = 8.8317e-02, PNorm = 83.1490, GNorm = 0.4531, lr_0 = 2.4371e-04
Loss = 7.9645e-02, PNorm = 83.1555, GNorm = 0.4401, lr_0 = 2.4354e-04
Loss = 9.0838e-02, PNorm = 83.1584, GNorm = 0.9842, lr_0 = 2.4338e-04
Loss = 9.7680e-02, PNorm = 83.1613, GNorm = 0.6761, lr_0 = 2.4321e-04
Loss = 9.5191e-02, PNorm = 83.1651, GNorm = 0.7306, lr_0 = 2.4304e-04
Loss = 1.1397e-01, PNorm = 83.1707, GNorm = 1.1419, lr_0 = 2.4288e-04
Loss = 9.2771e-02, PNorm = 83.1769, GNorm = 0.7267, lr_0 = 2.4271e-04
Loss = 8.9753e-02, PNorm = 83.1790, GNorm = 0.6596, lr_0 = 2.4254e-04
Loss = 9.8673e-02, PNorm = 83.1787, GNorm = 0.6993, lr_0 = 2.4238e-04
Loss = 8.8481e-02, PNorm = 83.1849, GNorm = 0.8138, lr_0 = 2.4221e-04
Loss = 9.9450e-02, PNorm = 83.1907, GNorm = 0.6150, lr_0 = 2.4205e-04
Loss = 8.7719e-02, PNorm = 83.1957, GNorm = 0.7019, lr_0 = 2.4188e-04
Loss = 1.1384e-01, PNorm = 83.2018, GNorm = 0.5264, lr_0 = 2.4171e-04
Loss = 9.1150e-02, PNorm = 83.2098, GNorm = 0.7063, lr_0 = 2.4155e-04
Loss = 9.0542e-02, PNorm = 83.2132, GNorm = 0.6739, lr_0 = 2.4138e-04
Loss = 1.0622e-01, PNorm = 83.2178, GNorm = 0.8604, lr_0 = 2.4122e-04
Loss = 9.0998e-02, PNorm = 83.2224, GNorm = 0.6304, lr_0 = 2.4105e-04
Loss = 1.0338e-01, PNorm = 83.2274, GNorm = 0.5902, lr_0 = 2.4089e-04
Loss = 9.3039e-02, PNorm = 83.2326, GNorm = 0.8823, lr_0 = 2.4072e-04
Loss = 8.8468e-02, PNorm = 83.2403, GNorm = 0.4386, lr_0 = 2.4056e-04
Loss = 9.4363e-02, PNorm = 83.2458, GNorm = 0.6534, lr_0 = 2.4039e-04
Loss = 9.3535e-02, PNorm = 83.2525, GNorm = 0.9511, lr_0 = 2.4023e-04
Loss = 1.0069e-01, PNorm = 83.2572, GNorm = 0.6961, lr_0 = 2.4006e-04
Loss = 8.4490e-02, PNorm = 83.2652, GNorm = 0.5234, lr_0 = 2.3990e-04
Loss = 8.5571e-02, PNorm = 83.2714, GNorm = 0.7300, lr_0 = 2.3974e-04
Loss = 9.5245e-02, PNorm = 83.2730, GNorm = 0.5746, lr_0 = 2.3957e-04
Loss = 1.0512e-01, PNorm = 83.2756, GNorm = 0.6285, lr_0 = 2.3941e-04
Loss = 8.0610e-02, PNorm = 83.2796, GNorm = 0.5670, lr_0 = 2.3924e-04
Loss = 8.5464e-02, PNorm = 83.2827, GNorm = 0.8156, lr_0 = 2.3908e-04
Loss = 1.0495e-01, PNorm = 83.2854, GNorm = 0.6869, lr_0 = 2.3892e-04
Loss = 9.2330e-02, PNorm = 83.2879, GNorm = 0.6330, lr_0 = 2.3875e-04
Loss = 8.3627e-02, PNorm = 83.2905, GNorm = 0.6322, lr_0 = 2.3859e-04
Loss = 1.0293e-01, PNorm = 83.2908, GNorm = 0.7636, lr_0 = 2.3842e-04
Loss = 9.2111e-02, PNorm = 83.2923, GNorm = 0.7413, lr_0 = 2.3826e-04
Loss = 9.5823e-02, PNorm = 83.2936, GNorm = 0.5047, lr_0 = 2.3810e-04
Loss = 1.0015e-01, PNorm = 83.2994, GNorm = 0.9559, lr_0 = 2.3794e-04
Loss = 1.0141e-01, PNorm = 83.3050, GNorm = 0.9025, lr_0 = 2.3777e-04
Loss = 8.4582e-02, PNorm = 83.3099, GNorm = 0.5200, lr_0 = 2.3761e-04
Loss = 9.8111e-02, PNorm = 83.3168, GNorm = 0.5653, lr_0 = 2.3745e-04
Loss = 9.8191e-02, PNorm = 83.3216, GNorm = 0.8386, lr_0 = 2.3728e-04
Loss = 1.0821e-01, PNorm = 83.3261, GNorm = 0.8432, lr_0 = 2.3712e-04
Loss = 9.3339e-02, PNorm = 83.3338, GNorm = 0.5770, lr_0 = 2.3696e-04
Loss = 1.1510e-01, PNorm = 83.3398, GNorm = 0.6748, lr_0 = 2.3680e-04
Loss = 8.0319e-02, PNorm = 83.3426, GNorm = 0.5363, lr_0 = 2.3663e-04
Loss = 9.4982e-02, PNorm = 83.3454, GNorm = 0.6999, lr_0 = 2.3647e-04
Loss = 9.3126e-02, PNorm = 83.3485, GNorm = 0.6666, lr_0 = 2.3631e-04
Loss = 9.1912e-02, PNorm = 83.3524, GNorm = 0.6339, lr_0 = 2.3615e-04
Loss = 9.4510e-02, PNorm = 83.3555, GNorm = 0.5618, lr_0 = 2.3599e-04
Loss = 8.4568e-02, PNorm = 83.3608, GNorm = 0.6277, lr_0 = 2.3582e-04
Loss = 1.0063e-01, PNorm = 83.3662, GNorm = 0.7952, lr_0 = 2.3566e-04
Loss = 9.4055e-02, PNorm = 83.3720, GNorm = 0.7539, lr_0 = 2.3550e-04
Loss = 1.1089e-01, PNorm = 83.3765, GNorm = 0.8856, lr_0 = 2.3534e-04
Loss = 1.0742e-01, PNorm = 83.3801, GNorm = 0.5185, lr_0 = 2.3518e-04
Loss = 9.9397e-02, PNorm = 83.3850, GNorm = 0.6106, lr_0 = 2.3502e-04
Loss = 1.1149e-01, PNorm = 83.3899, GNorm = 1.0429, lr_0 = 2.3486e-04
Loss = 9.8776e-02, PNorm = 83.3938, GNorm = 0.5840, lr_0 = 2.3470e-04
Loss = 7.4589e-02, PNorm = 83.3973, GNorm = 0.5194, lr_0 = 2.3454e-04
Loss = 8.6085e-02, PNorm = 83.4020, GNorm = 0.5702, lr_0 = 2.3437e-04
Loss = 1.0586e-01, PNorm = 83.4069, GNorm = 0.8662, lr_0 = 2.3421e-04
Loss = 1.1207e-01, PNorm = 83.4103, GNorm = 0.7312, lr_0 = 2.3405e-04
Loss = 1.0835e-01, PNorm = 83.4125, GNorm = 0.5674, lr_0 = 2.3389e-04
Loss = 9.9576e-02, PNorm = 83.4179, GNorm = 0.6637, lr_0 = 2.3373e-04
Loss = 9.6000e-02, PNorm = 83.4253, GNorm = 0.6112, lr_0 = 2.3357e-04
Loss = 8.7677e-02, PNorm = 83.4328, GNorm = 0.7732, lr_0 = 2.3341e-04
Loss = 9.0982e-02, PNorm = 83.4393, GNorm = 0.5905, lr_0 = 2.3325e-04
Loss = 8.9450e-02, PNorm = 83.4467, GNorm = 0.6430, lr_0 = 2.3309e-04
Loss = 9.9702e-02, PNorm = 83.4524, GNorm = 0.5732, lr_0 = 2.3293e-04
Loss = 1.0072e-01, PNorm = 83.4547, GNorm = 0.6672, lr_0 = 2.3277e-04
Loss = 9.3981e-02, PNorm = 83.4616, GNorm = 0.6204, lr_0 = 2.3261e-04
Loss = 8.6102e-02, PNorm = 83.4666, GNorm = 0.6204, lr_0 = 2.3246e-04
Loss = 9.3479e-02, PNorm = 83.4699, GNorm = 0.6239, lr_0 = 2.3230e-04
Loss = 9.1488e-02, PNorm = 83.4781, GNorm = 0.6411, lr_0 = 2.3214e-04
Loss = 1.0170e-01, PNorm = 83.4860, GNorm = 0.5842, lr_0 = 2.3198e-04
Loss = 9.0323e-02, PNorm = 83.4947, GNorm = 0.7882, lr_0 = 2.3182e-04
Loss = 8.9383e-02, PNorm = 83.5009, GNorm = 0.6528, lr_0 = 2.3166e-04
Loss = 9.8522e-02, PNorm = 83.5047, GNorm = 0.6002, lr_0 = 2.3150e-04
Loss = 9.0400e-02, PNorm = 83.5108, GNorm = 0.5512, lr_0 = 2.3134e-04
Loss = 8.3945e-02, PNorm = 83.5127, GNorm = 0.5390, lr_0 = 2.3118e-04
Loss = 8.9365e-02, PNorm = 83.5178, GNorm = 0.4344, lr_0 = 2.3103e-04
Loss = 9.5174e-02, PNorm = 83.5245, GNorm = 0.8632, lr_0 = 2.3087e-04
Loss = 1.0953e-01, PNorm = 83.5263, GNorm = 0.8575, lr_0 = 2.3071e-04
Loss = 9.9936e-02, PNorm = 83.5311, GNorm = 0.6667, lr_0 = 2.3055e-04
Loss = 8.7457e-02, PNorm = 83.5353, GNorm = 0.8459, lr_0 = 2.3039e-04
Loss = 1.0463e-01, PNorm = 83.5375, GNorm = 1.1047, lr_0 = 2.3024e-04
Loss = 8.4148e-02, PNorm = 83.5406, GNorm = 0.4625, lr_0 = 2.3008e-04
Loss = 8.9160e-02, PNorm = 83.5456, GNorm = 0.6245, lr_0 = 2.2992e-04
Loss = 8.5437e-02, PNorm = 83.5492, GNorm = 0.5845, lr_0 = 2.2976e-04
Loss = 1.0342e-01, PNorm = 83.5527, GNorm = 0.6886, lr_0 = 2.2961e-04
Loss = 9.1315e-02, PNorm = 83.5567, GNorm = 0.4685, lr_0 = 2.2945e-04
Loss = 9.4179e-02, PNorm = 83.5589, GNorm = 0.6600, lr_0 = 2.2929e-04
Loss = 8.5968e-02, PNorm = 83.5632, GNorm = 0.5962, lr_0 = 2.2913e-04
Loss = 8.2720e-02, PNorm = 83.5692, GNorm = 0.7066, lr_0 = 2.2898e-04
Loss = 8.8624e-02, PNorm = 83.5731, GNorm = 0.6700, lr_0 = 2.2882e-04
Loss = 9.8770e-02, PNorm = 83.5761, GNorm = 0.6154, lr_0 = 2.2866e-04
Loss = 9.2737e-02, PNorm = 83.5817, GNorm = 0.5755, lr_0 = 2.2851e-04
Loss = 9.9923e-02, PNorm = 83.5875, GNorm = 0.7016, lr_0 = 2.2835e-04
Loss = 8.2387e-02, PNorm = 83.5933, GNorm = 0.6613, lr_0 = 2.2819e-04
Loss = 9.5925e-02, PNorm = 83.5937, GNorm = 0.5607, lr_0 = 2.2804e-04
Loss = 9.4337e-02, PNorm = 83.5939, GNorm = 0.6429, lr_0 = 2.2788e-04
Loss = 8.7994e-02, PNorm = 83.5975, GNorm = 0.4972, lr_0 = 2.2773e-04
Loss = 9.8365e-02, PNorm = 83.5997, GNorm = 0.8111, lr_0 = 2.2757e-04
Validation mae = 0.229225
Epoch 20
Loss = 8.6334e-02, PNorm = 83.6035, GNorm = 0.9573, lr_0 = 2.2741e-04
Loss = 9.2014e-02, PNorm = 83.6085, GNorm = 0.9432, lr_0 = 2.2726e-04
Loss = 9.2326e-02, PNorm = 83.6128, GNorm = 0.7692, lr_0 = 2.2710e-04
Loss = 9.2495e-02, PNorm = 83.6197, GNorm = 0.5256, lr_0 = 2.2695e-04
Loss = 9.6428e-02, PNorm = 83.6246, GNorm = 0.6683, lr_0 = 2.2679e-04
Loss = 8.9125e-02, PNorm = 83.6256, GNorm = 0.7052, lr_0 = 2.2664e-04
Loss = 8.4066e-02, PNorm = 83.6288, GNorm = 0.6362, lr_0 = 2.2648e-04
Loss = 9.2647e-02, PNorm = 83.6326, GNorm = 0.6738, lr_0 = 2.2632e-04
Loss = 9.8520e-02, PNorm = 83.6334, GNorm = 0.6845, lr_0 = 2.2617e-04
Loss = 8.7421e-02, PNorm = 83.6395, GNorm = 0.8411, lr_0 = 2.2601e-04
Loss = 1.0815e-01, PNorm = 83.6454, GNorm = 0.7010, lr_0 = 2.2586e-04
Loss = 9.9113e-02, PNorm = 83.6497, GNorm = 0.6873, lr_0 = 2.2571e-04
Loss = 8.0935e-02, PNorm = 83.6530, GNorm = 0.7251, lr_0 = 2.2555e-04
Loss = 9.6254e-02, PNorm = 83.6565, GNorm = 0.6332, lr_0 = 2.2540e-04
Loss = 8.7044e-02, PNorm = 83.6608, GNorm = 0.6213, lr_0 = 2.2524e-04
Loss = 8.7739e-02, PNorm = 83.6656, GNorm = 0.6603, lr_0 = 2.2509e-04
Loss = 7.9651e-02, PNorm = 83.6666, GNorm = 0.6517, lr_0 = 2.2493e-04
Loss = 8.8677e-02, PNorm = 83.6698, GNorm = 0.6302, lr_0 = 2.2478e-04
Loss = 8.7520e-02, PNorm = 83.6758, GNorm = 0.6753, lr_0 = 2.2463e-04
Loss = 8.9112e-02, PNorm = 83.6822, GNorm = 0.6618, lr_0 = 2.2447e-04
Loss = 8.4744e-02, PNorm = 83.6889, GNorm = 0.6850, lr_0 = 2.2432e-04
Loss = 7.4716e-02, PNorm = 83.6951, GNorm = 0.6629, lr_0 = 2.2416e-04
Loss = 8.6685e-02, PNorm = 83.6992, GNorm = 0.5004, lr_0 = 2.2401e-04
Loss = 9.1945e-02, PNorm = 83.7051, GNorm = 0.5726, lr_0 = 2.2386e-04
Loss = 9.4224e-02, PNorm = 83.7080, GNorm = 0.8024, lr_0 = 2.2370e-04
Loss = 8.4658e-02, PNorm = 83.7145, GNorm = 0.6148, lr_0 = 2.2355e-04
Loss = 8.8698e-02, PNorm = 83.7190, GNorm = 0.5100, lr_0 = 2.2340e-04
Loss = 9.7695e-02, PNorm = 83.7272, GNorm = 0.8419, lr_0 = 2.2324e-04
Loss = 1.0106e-01, PNorm = 83.7320, GNorm = 0.6086, lr_0 = 2.2309e-04
Loss = 8.3778e-02, PNorm = 83.7298, GNorm = 0.5859, lr_0 = 2.2294e-04
Loss = 7.8885e-02, PNorm = 83.7311, GNorm = 0.6284, lr_0 = 2.2279e-04
Loss = 9.6736e-02, PNorm = 83.7379, GNorm = 0.8656, lr_0 = 2.2263e-04
Loss = 9.7376e-02, PNorm = 83.7459, GNorm = 0.5739, lr_0 = 2.2248e-04
Loss = 9.9088e-02, PNorm = 83.7543, GNorm = 0.4550, lr_0 = 2.2233e-04
Loss = 9.6168e-02, PNorm = 83.7601, GNorm = 0.6058, lr_0 = 2.2218e-04
Loss = 8.4885e-02, PNorm = 83.7635, GNorm = 0.7252, lr_0 = 2.2202e-04
Loss = 9.1774e-02, PNorm = 83.7677, GNorm = 0.8432, lr_0 = 2.2187e-04
Loss = 1.0007e-01, PNorm = 83.7726, GNorm = 0.5313, lr_0 = 2.2172e-04
Loss = 9.4691e-02, PNorm = 83.7772, GNorm = 0.5415, lr_0 = 2.2157e-04
Loss = 9.7926e-02, PNorm = 83.7824, GNorm = 0.7456, lr_0 = 2.2142e-04
Loss = 7.9304e-02, PNorm = 83.7860, GNorm = 0.5894, lr_0 = 2.2126e-04
Loss = 9.2010e-02, PNorm = 83.7903, GNorm = 0.5957, lr_0 = 2.2111e-04
Loss = 9.1109e-02, PNorm = 83.7936, GNorm = 0.6487, lr_0 = 2.2096e-04
Loss = 9.5946e-02, PNorm = 83.7948, GNorm = 1.0762, lr_0 = 2.2081e-04
Loss = 1.0498e-01, PNorm = 83.7980, GNorm = 0.6317, lr_0 = 2.2066e-04
Loss = 9.0590e-02, PNorm = 83.8000, GNorm = 0.5983, lr_0 = 2.2051e-04
Loss = 8.9105e-02, PNorm = 83.8030, GNorm = 0.5978, lr_0 = 2.2036e-04
Loss = 8.7022e-02, PNorm = 83.8067, GNorm = 0.5496, lr_0 = 2.2021e-04
Loss = 8.2504e-02, PNorm = 83.8129, GNorm = 0.5567, lr_0 = 2.2005e-04
Loss = 8.1735e-02, PNorm = 83.8168, GNorm = 0.7765, lr_0 = 2.1990e-04
Loss = 9.0767e-02, PNorm = 83.8219, GNorm = 0.5545, lr_0 = 2.1975e-04
Loss = 8.4254e-02, PNorm = 83.8248, GNorm = 0.5088, lr_0 = 2.1960e-04
Loss = 9.5285e-02, PNorm = 83.8275, GNorm = 0.7938, lr_0 = 2.1945e-04
Loss = 9.5714e-02, PNorm = 83.8316, GNorm = 0.8146, lr_0 = 2.1930e-04
Loss = 8.8692e-02, PNorm = 83.8340, GNorm = 0.6665, lr_0 = 2.1915e-04
Loss = 8.7264e-02, PNorm = 83.8367, GNorm = 0.6102, lr_0 = 2.1900e-04
Loss = 8.6298e-02, PNorm = 83.8424, GNorm = 0.8114, lr_0 = 2.1885e-04
Loss = 9.4389e-02, PNorm = 83.8476, GNorm = 0.5231, lr_0 = 2.1870e-04
Loss = 9.1020e-02, PNorm = 83.8468, GNorm = 0.8881, lr_0 = 2.1855e-04
Loss = 9.9155e-02, PNorm = 83.8446, GNorm = 0.7277, lr_0 = 2.1840e-04
Loss = 8.7737e-02, PNorm = 83.8508, GNorm = 0.7958, lr_0 = 2.1825e-04
Loss = 9.0469e-02, PNorm = 83.8560, GNorm = 0.5551, lr_0 = 2.1810e-04
Loss = 8.8831e-02, PNorm = 83.8600, GNorm = 0.6844, lr_0 = 2.1795e-04
Loss = 9.1211e-02, PNorm = 83.8645, GNorm = 0.9583, lr_0 = 2.1780e-04
Loss = 1.0656e-01, PNorm = 83.8695, GNorm = 0.6527, lr_0 = 2.1765e-04
Loss = 9.8559e-02, PNorm = 83.8751, GNorm = 0.7361, lr_0 = 2.1751e-04
Loss = 8.3826e-02, PNorm = 83.8800, GNorm = 0.4357, lr_0 = 2.1736e-04
Loss = 1.0017e-01, PNorm = 83.8825, GNorm = 0.8033, lr_0 = 2.1721e-04
Loss = 9.0573e-02, PNorm = 83.8853, GNorm = 0.6186, lr_0 = 2.1706e-04
Loss = 1.0458e-01, PNorm = 83.8883, GNorm = 0.4730, lr_0 = 2.1691e-04
Loss = 8.9729e-02, PNorm = 83.8910, GNorm = 0.5366, lr_0 = 2.1676e-04
Loss = 8.4876e-02, PNorm = 83.8936, GNorm = 0.6084, lr_0 = 2.1661e-04
Loss = 1.0172e-01, PNorm = 83.8964, GNorm = 0.6084, lr_0 = 2.1646e-04
Loss = 8.8885e-02, PNorm = 83.8998, GNorm = 0.4630, lr_0 = 2.1632e-04
Loss = 9.2316e-02, PNorm = 83.9031, GNorm = 0.6497, lr_0 = 2.1617e-04
Loss = 8.4313e-02, PNorm = 83.9073, GNorm = 0.6389, lr_0 = 2.1602e-04
Loss = 8.5756e-02, PNorm = 83.9107, GNorm = 0.6908, lr_0 = 2.1587e-04
Loss = 9.9289e-02, PNorm = 83.9156, GNorm = 0.5156, lr_0 = 2.1572e-04
Loss = 9.5775e-02, PNorm = 83.9196, GNorm = 0.5690, lr_0 = 2.1558e-04
Loss = 7.7624e-02, PNorm = 83.9240, GNorm = 0.4793, lr_0 = 2.1543e-04
Loss = 9.2929e-02, PNorm = 83.9275, GNorm = 0.7173, lr_0 = 2.1528e-04
Loss = 9.0834e-02, PNorm = 83.9302, GNorm = 0.6508, lr_0 = 2.1513e-04
Loss = 8.5346e-02, PNorm = 83.9371, GNorm = 0.6515, lr_0 = 2.1499e-04
Loss = 9.3784e-02, PNorm = 83.9431, GNorm = 0.6891, lr_0 = 2.1484e-04
Loss = 1.0436e-01, PNorm = 83.9482, GNorm = 0.7641, lr_0 = 2.1469e-04
Loss = 9.8028e-02, PNorm = 83.9521, GNorm = 0.6432, lr_0 = 2.1454e-04
Loss = 8.6073e-02, PNorm = 83.9552, GNorm = 0.5900, lr_0 = 2.1440e-04
Loss = 8.2499e-02, PNorm = 83.9574, GNorm = 0.8171, lr_0 = 2.1425e-04
Loss = 1.0273e-01, PNorm = 83.9569, GNorm = 0.8484, lr_0 = 2.1410e-04
Loss = 8.8952e-02, PNorm = 83.9591, GNorm = 0.6712, lr_0 = 2.1396e-04
Loss = 8.1456e-02, PNorm = 83.9635, GNorm = 0.5553, lr_0 = 2.1381e-04
Loss = 9.2497e-02, PNorm = 83.9705, GNorm = 0.6840, lr_0 = 2.1366e-04
Loss = 9.0534e-02, PNorm = 83.9738, GNorm = 0.7013, lr_0 = 2.1352e-04
Loss = 9.1826e-02, PNorm = 83.9786, GNorm = 0.7781, lr_0 = 2.1337e-04
Loss = 9.0184e-02, PNorm = 83.9826, GNorm = 0.5675, lr_0 = 2.1323e-04
Loss = 9.1667e-02, PNorm = 83.9848, GNorm = 0.5612, lr_0 = 2.1308e-04
Loss = 9.7785e-02, PNorm = 83.9861, GNorm = 0.5844, lr_0 = 2.1293e-04
Loss = 8.5823e-02, PNorm = 83.9905, GNorm = 0.6143, lr_0 = 2.1279e-04
Loss = 8.8940e-02, PNorm = 83.9916, GNorm = 0.6425, lr_0 = 2.1264e-04
Loss = 1.0377e-01, PNorm = 83.9947, GNorm = 0.7714, lr_0 = 2.1250e-04
Loss = 1.1068e-01, PNorm = 83.9995, GNorm = 0.6374, lr_0 = 2.1235e-04
Loss = 8.8832e-02, PNorm = 84.0040, GNorm = 0.5704, lr_0 = 2.1221e-04
Loss = 7.7720e-02, PNorm = 84.0060, GNorm = 0.5716, lr_0 = 2.1206e-04
Loss = 8.9067e-02, PNorm = 84.0117, GNorm = 0.6127, lr_0 = 2.1191e-04
Loss = 1.0111e-01, PNorm = 84.0203, GNorm = 0.8250, lr_0 = 2.1177e-04
Loss = 9.8276e-02, PNorm = 84.0235, GNorm = 0.7188, lr_0 = 2.1162e-04
Loss = 9.0173e-02, PNorm = 84.0297, GNorm = 0.5257, lr_0 = 2.1148e-04
Loss = 9.7519e-02, PNorm = 84.0327, GNorm = 0.7712, lr_0 = 2.1133e-04
Loss = 9.1775e-02, PNorm = 84.0336, GNorm = 0.6101, lr_0 = 2.1119e-04
Loss = 8.8315e-02, PNorm = 84.0370, GNorm = 0.5602, lr_0 = 2.1104e-04
Loss = 9.7258e-02, PNorm = 84.0420, GNorm = 0.6657, lr_0 = 2.1090e-04
Loss = 9.0730e-02, PNorm = 84.0447, GNorm = 0.5859, lr_0 = 2.1076e-04
Loss = 9.4433e-02, PNorm = 84.0449, GNorm = 0.5947, lr_0 = 2.1061e-04
Loss = 9.5738e-02, PNorm = 84.0481, GNorm = 0.7001, lr_0 = 2.1047e-04
Loss = 9.3983e-02, PNorm = 84.0531, GNorm = 0.7880, lr_0 = 2.1032e-04
Loss = 9.4194e-02, PNorm = 84.0589, GNorm = 0.7340, lr_0 = 2.1018e-04
Loss = 9.7386e-02, PNorm = 84.0634, GNorm = 0.5270, lr_0 = 2.1003e-04
Loss = 8.6582e-02, PNorm = 84.0662, GNorm = 0.7850, lr_0 = 2.0989e-04
Loss = 9.6368e-02, PNorm = 84.0681, GNorm = 0.6518, lr_0 = 2.0975e-04
Loss = 9.0425e-02, PNorm = 84.0710, GNorm = 0.6902, lr_0 = 2.0960e-04
Validation mae = 0.227334
Epoch 21
Loss = 8.3130e-02, PNorm = 84.0743, GNorm = 0.5039, lr_0 = 2.0946e-04
Loss = 7.3756e-02, PNorm = 84.0783, GNorm = 0.6019, lr_0 = 2.0932e-04
Loss = 9.1784e-02, PNorm = 84.0828, GNorm = 0.6524, lr_0 = 2.0917e-04
Loss = 8.4916e-02, PNorm = 84.0873, GNorm = 0.7368, lr_0 = 2.0903e-04
Loss = 7.7045e-02, PNorm = 84.0903, GNorm = 0.6942, lr_0 = 2.0889e-04
Loss = 7.4300e-02, PNorm = 84.0922, GNorm = 0.7548, lr_0 = 2.0874e-04
Loss = 9.3403e-02, PNorm = 84.0962, GNorm = 0.5307, lr_0 = 2.0860e-04
Loss = 8.0656e-02, PNorm = 84.0995, GNorm = 0.5199, lr_0 = 2.0846e-04
Loss = 8.8485e-02, PNorm = 84.1028, GNorm = 0.6951, lr_0 = 2.0831e-04
Loss = 9.8263e-02, PNorm = 84.1078, GNorm = 1.0316, lr_0 = 2.0817e-04
Loss = 8.6163e-02, PNorm = 84.1110, GNorm = 0.5471, lr_0 = 2.0803e-04
Loss = 8.3495e-02, PNorm = 84.1155, GNorm = 0.7092, lr_0 = 2.0789e-04
Loss = 9.6718e-02, PNorm = 84.1224, GNorm = 0.7400, lr_0 = 2.0774e-04
Loss = 8.4158e-02, PNorm = 84.1282, GNorm = 0.6313, lr_0 = 2.0760e-04
Loss = 8.6345e-02, PNorm = 84.1332, GNorm = 0.5012, lr_0 = 2.0746e-04
Loss = 9.6422e-02, PNorm = 84.1409, GNorm = 0.6515, lr_0 = 2.0732e-04
Loss = 8.6311e-02, PNorm = 84.1454, GNorm = 0.4356, lr_0 = 2.0718e-04
Loss = 8.1049e-02, PNorm = 84.1515, GNorm = 0.5830, lr_0 = 2.0703e-04
Loss = 8.7096e-02, PNorm = 84.1564, GNorm = 0.6091, lr_0 = 2.0689e-04
Loss = 9.4624e-02, PNorm = 84.1585, GNorm = 0.7757, lr_0 = 2.0675e-04
Loss = 8.3858e-02, PNorm = 84.1610, GNorm = 0.5811, lr_0 = 2.0661e-04
Loss = 7.5143e-02, PNorm = 84.1632, GNorm = 0.6600, lr_0 = 2.0647e-04
Loss = 7.7411e-02, PNorm = 84.1653, GNorm = 0.6549, lr_0 = 2.0633e-04
Loss = 9.0156e-02, PNorm = 84.1673, GNorm = 0.7330, lr_0 = 2.0618e-04
Loss = 8.9477e-02, PNorm = 84.1710, GNorm = 0.6870, lr_0 = 2.0604e-04
Loss = 9.1098e-02, PNorm = 84.1749, GNorm = 0.6192, lr_0 = 2.0590e-04
Loss = 8.7159e-02, PNorm = 84.1748, GNorm = 0.7339, lr_0 = 2.0576e-04
Loss = 8.8213e-02, PNorm = 84.1787, GNorm = 0.5253, lr_0 = 2.0562e-04
Loss = 9.0489e-02, PNorm = 84.1847, GNorm = 0.4939, lr_0 = 2.0548e-04
Loss = 7.9939e-02, PNorm = 84.1880, GNorm = 0.9843, lr_0 = 2.0534e-04
Loss = 8.3400e-02, PNorm = 84.1898, GNorm = 0.5885, lr_0 = 2.0520e-04
Loss = 9.2735e-02, PNorm = 84.1954, GNorm = 0.5713, lr_0 = 2.0506e-04
Loss = 9.0310e-02, PNorm = 84.1969, GNorm = 0.5977, lr_0 = 2.0492e-04
Loss = 9.7212e-02, PNorm = 84.1979, GNorm = 0.6130, lr_0 = 2.0478e-04
Loss = 9.1415e-02, PNorm = 84.2009, GNorm = 0.6422, lr_0 = 2.0464e-04
Loss = 9.0339e-02, PNorm = 84.2038, GNorm = 0.7015, lr_0 = 2.0450e-04
Loss = 9.2112e-02, PNorm = 84.2093, GNorm = 0.5606, lr_0 = 2.0436e-04
Loss = 9.3866e-02, PNorm = 84.2136, GNorm = 0.7419, lr_0 = 2.0422e-04
Loss = 8.5965e-02, PNorm = 84.2177, GNorm = 0.4697, lr_0 = 2.0408e-04
Loss = 1.0106e-01, PNorm = 84.2229, GNorm = 0.5679, lr_0 = 2.0394e-04
Loss = 9.5091e-02, PNorm = 84.2274, GNorm = 0.7819, lr_0 = 2.0380e-04
Loss = 8.1813e-02, PNorm = 84.2333, GNorm = 0.5880, lr_0 = 2.0366e-04
Loss = 8.5449e-02, PNorm = 84.2376, GNorm = 0.5397, lr_0 = 2.0352e-04
Loss = 9.7512e-02, PNorm = 84.2402, GNorm = 0.7856, lr_0 = 2.0338e-04
Loss = 8.2621e-02, PNorm = 84.2445, GNorm = 0.5913, lr_0 = 2.0324e-04
Loss = 8.8569e-02, PNorm = 84.2492, GNorm = 0.6673, lr_0 = 2.0310e-04
Loss = 8.1103e-02, PNorm = 84.2554, GNorm = 0.6237, lr_0 = 2.0296e-04
Loss = 1.0044e-01, PNorm = 84.2609, GNorm = 0.6275, lr_0 = 2.0282e-04
Loss = 8.3872e-02, PNorm = 84.2637, GNorm = 0.7330, lr_0 = 2.0268e-04
Loss = 9.6277e-02, PNorm = 84.2667, GNorm = 0.7643, lr_0 = 2.0254e-04
Loss = 9.4370e-02, PNorm = 84.2669, GNorm = 0.6656, lr_0 = 2.0240e-04
Loss = 8.4113e-02, PNorm = 84.2680, GNorm = 1.0219, lr_0 = 2.0227e-04
Loss = 1.0535e-01, PNorm = 84.2728, GNorm = 0.6532, lr_0 = 2.0213e-04
Loss = 8.8701e-02, PNorm = 84.2805, GNorm = 0.6535, lr_0 = 2.0199e-04
Loss = 1.0806e-01, PNorm = 84.2865, GNorm = 0.6330, lr_0 = 2.0185e-04
Loss = 8.6910e-02, PNorm = 84.2893, GNorm = 0.5736, lr_0 = 2.0171e-04
Loss = 9.1693e-02, PNorm = 84.2930, GNorm = 0.8005, lr_0 = 2.0157e-04
Loss = 9.4562e-02, PNorm = 84.2942, GNorm = 0.6628, lr_0 = 2.0144e-04
Loss = 9.0546e-02, PNorm = 84.2991, GNorm = 0.5405, lr_0 = 2.0130e-04
Loss = 9.9099e-02, PNorm = 84.3033, GNorm = 0.6204, lr_0 = 2.0116e-04
Loss = 9.1315e-02, PNorm = 84.3070, GNorm = 0.7192, lr_0 = 2.0102e-04
Loss = 9.3592e-02, PNorm = 84.3119, GNorm = 0.8470, lr_0 = 2.0088e-04
Loss = 8.1985e-02, PNorm = 84.3165, GNorm = 0.5961, lr_0 = 2.0075e-04
Loss = 9.5103e-02, PNorm = 84.3202, GNorm = 0.7306, lr_0 = 2.0061e-04
Loss = 8.3733e-02, PNorm = 84.3226, GNorm = 0.5123, lr_0 = 2.0047e-04
Loss = 9.6245e-02, PNorm = 84.3260, GNorm = 0.7803, lr_0 = 2.0033e-04
Loss = 9.2533e-02, PNorm = 84.3289, GNorm = 0.8247, lr_0 = 2.0020e-04
Loss = 8.4592e-02, PNorm = 84.3314, GNorm = 0.7144, lr_0 = 2.0006e-04
Loss = 8.7457e-02, PNorm = 84.3340, GNorm = 0.7332, lr_0 = 1.9992e-04
Loss = 9.4925e-02, PNorm = 84.3398, GNorm = 0.5722, lr_0 = 1.9979e-04
Loss = 1.0169e-01, PNorm = 84.3442, GNorm = 0.6012, lr_0 = 1.9965e-04
Loss = 8.7695e-02, PNorm = 84.3499, GNorm = 0.7151, lr_0 = 1.9951e-04
Loss = 8.4821e-02, PNorm = 84.3534, GNorm = 0.7774, lr_0 = 1.9938e-04
Loss = 8.5275e-02, PNorm = 84.3552, GNorm = 0.7296, lr_0 = 1.9924e-04
Loss = 8.2561e-02, PNorm = 84.3557, GNorm = 0.5488, lr_0 = 1.9910e-04
Loss = 9.3666e-02, PNorm = 84.3603, GNorm = 0.5655, lr_0 = 1.9897e-04
Loss = 1.0021e-01, PNorm = 84.3652, GNorm = 0.8993, lr_0 = 1.9883e-04
Loss = 8.5516e-02, PNorm = 84.3700, GNorm = 0.7637, lr_0 = 1.9869e-04
Loss = 8.1315e-02, PNorm = 84.3734, GNorm = 0.6319, lr_0 = 1.9856e-04
Loss = 8.8183e-02, PNorm = 84.3755, GNorm = 0.6003, lr_0 = 1.9842e-04
Loss = 8.0168e-02, PNorm = 84.3769, GNorm = 0.5889, lr_0 = 1.9829e-04
Loss = 8.8789e-02, PNorm = 84.3788, GNorm = 0.7681, lr_0 = 1.9815e-04
Loss = 7.7044e-02, PNorm = 84.3811, GNorm = 0.6858, lr_0 = 1.9801e-04
Loss = 9.1614e-02, PNorm = 84.3866, GNorm = 0.5764, lr_0 = 1.9788e-04
Loss = 1.0236e-01, PNorm = 84.3934, GNorm = 0.7794, lr_0 = 1.9774e-04
Loss = 8.5262e-02, PNorm = 84.4015, GNorm = 0.6505, lr_0 = 1.9761e-04
Loss = 8.1595e-02, PNorm = 84.4056, GNorm = 0.6032, lr_0 = 1.9747e-04
Loss = 8.5457e-02, PNorm = 84.4092, GNorm = 0.9364, lr_0 = 1.9734e-04
Loss = 8.9378e-02, PNorm = 84.4115, GNorm = 0.6029, lr_0 = 1.9720e-04
Loss = 8.4922e-02, PNorm = 84.4146, GNorm = 0.6639, lr_0 = 1.9707e-04
Loss = 9.8531e-02, PNorm = 84.4190, GNorm = 0.6646, lr_0 = 1.9693e-04
Loss = 9.4662e-02, PNorm = 84.4218, GNorm = 0.6770, lr_0 = 1.9680e-04
Loss = 8.3197e-02, PNorm = 84.4263, GNorm = 0.8075, lr_0 = 1.9666e-04
Loss = 9.6961e-02, PNorm = 84.4279, GNorm = 0.6217, lr_0 = 1.9653e-04
Loss = 9.9263e-02, PNorm = 84.4312, GNorm = 0.5316, lr_0 = 1.9639e-04
Loss = 7.8976e-02, PNorm = 84.4357, GNorm = 0.5033, lr_0 = 1.9626e-04
Loss = 8.7623e-02, PNorm = 84.4398, GNorm = 0.8417, lr_0 = 1.9612e-04
Loss = 9.3207e-02, PNorm = 84.4450, GNorm = 0.7446, lr_0 = 1.9599e-04
Loss = 8.8566e-02, PNorm = 84.4501, GNorm = 0.8194, lr_0 = 1.9585e-04
Loss = 9.4401e-02, PNorm = 84.4522, GNorm = 1.0979, lr_0 = 1.9572e-04
Loss = 9.5571e-02, PNorm = 84.4593, GNorm = 0.6923, lr_0 = 1.9559e-04
Loss = 1.0221e-01, PNorm = 84.4630, GNorm = 0.6748, lr_0 = 1.9545e-04
Loss = 8.7262e-02, PNorm = 84.4632, GNorm = 0.7593, lr_0 = 1.9532e-04
Loss = 9.1354e-02, PNorm = 84.4659, GNorm = 0.6471, lr_0 = 1.9518e-04
Loss = 9.2401e-02, PNorm = 84.4667, GNorm = 0.6765, lr_0 = 1.9505e-04
Loss = 9.2081e-02, PNorm = 84.4714, GNorm = 0.6399, lr_0 = 1.9492e-04
Loss = 8.4900e-02, PNorm = 84.4780, GNorm = 0.5034, lr_0 = 1.9478e-04
Loss = 9.1177e-02, PNorm = 84.4808, GNorm = 0.6440, lr_0 = 1.9465e-04
Loss = 9.4434e-02, PNorm = 84.4830, GNorm = 0.6311, lr_0 = 1.9452e-04
Loss = 8.8407e-02, PNorm = 84.4854, GNorm = 0.7175, lr_0 = 1.9438e-04
Loss = 8.1980e-02, PNorm = 84.4860, GNorm = 0.4680, lr_0 = 1.9425e-04
Loss = 8.5767e-02, PNorm = 84.4890, GNorm = 0.6251, lr_0 = 1.9412e-04
Loss = 8.4961e-02, PNorm = 84.4929, GNorm = 0.5504, lr_0 = 1.9398e-04
Loss = 9.9969e-02, PNorm = 84.4963, GNorm = 0.8188, lr_0 = 1.9385e-04
Loss = 9.4498e-02, PNorm = 84.5000, GNorm = 0.6368, lr_0 = 1.9372e-04
Loss = 1.0534e-01, PNorm = 84.5031, GNorm = 0.8263, lr_0 = 1.9359e-04
Loss = 8.9054e-02, PNorm = 84.5064, GNorm = 0.6035, lr_0 = 1.9345e-04
Loss = 9.3543e-02, PNorm = 84.5102, GNorm = 0.9045, lr_0 = 1.9332e-04
Loss = 8.2788e-02, PNorm = 84.5132, GNorm = 0.7883, lr_0 = 1.9319e-04
Loss = 8.9450e-02, PNorm = 84.5143, GNorm = 0.5915, lr_0 = 1.9306e-04
Validation mae = 0.227169
Epoch 22
Loss = 8.5770e-02, PNorm = 84.5170, GNorm = 0.6620, lr_0 = 1.9292e-04
Loss = 7.9864e-02, PNorm = 84.5169, GNorm = 0.6496, lr_0 = 1.9279e-04
Loss = 9.0438e-02, PNorm = 84.5191, GNorm = 0.6383, lr_0 = 1.9266e-04
Loss = 8.1886e-02, PNorm = 84.5232, GNorm = 0.5011, lr_0 = 1.9253e-04
Loss = 7.7488e-02, PNorm = 84.5270, GNorm = 0.6303, lr_0 = 1.9240e-04
Loss = 8.7601e-02, PNorm = 84.5321, GNorm = 0.5354, lr_0 = 1.9226e-04
Loss = 8.2015e-02, PNorm = 84.5388, GNorm = 0.6544, lr_0 = 1.9213e-04
Loss = 8.9903e-02, PNorm = 84.5417, GNorm = 0.5704, lr_0 = 1.9200e-04
Loss = 8.2856e-02, PNorm = 84.5466, GNorm = 0.7715, lr_0 = 1.9187e-04
Loss = 8.8671e-02, PNorm = 84.5488, GNorm = 0.6276, lr_0 = 1.9174e-04
Loss = 7.8804e-02, PNorm = 84.5509, GNorm = 0.6160, lr_0 = 1.9161e-04
Loss = 8.4552e-02, PNorm = 84.5540, GNorm = 0.5923, lr_0 = 1.9148e-04
Loss = 7.2932e-02, PNorm = 84.5575, GNorm = 0.5468, lr_0 = 1.9134e-04
Loss = 1.0172e-01, PNorm = 84.5612, GNorm = 0.8561, lr_0 = 1.9121e-04
Loss = 7.1119e-02, PNorm = 84.5651, GNorm = 0.4884, lr_0 = 1.9108e-04
Loss = 8.6809e-02, PNorm = 84.5696, GNorm = 0.5903, lr_0 = 1.9095e-04
Loss = 9.1424e-02, PNorm = 84.5740, GNorm = 0.6066, lr_0 = 1.9082e-04
Loss = 8.7106e-02, PNorm = 84.5778, GNorm = 0.5406, lr_0 = 1.9069e-04
Loss = 7.4981e-02, PNorm = 84.5801, GNorm = 0.4530, lr_0 = 1.9056e-04
Loss = 8.8827e-02, PNorm = 84.5816, GNorm = 0.5564, lr_0 = 1.9043e-04
Loss = 8.4437e-02, PNorm = 84.5844, GNorm = 0.6387, lr_0 = 1.9030e-04
Loss = 9.1051e-02, PNorm = 84.5891, GNorm = 0.5931, lr_0 = 1.9017e-04
Loss = 8.3430e-02, PNorm = 84.5942, GNorm = 0.6298, lr_0 = 1.9004e-04
Loss = 7.7276e-02, PNorm = 84.5982, GNorm = 0.6935, lr_0 = 1.8991e-04
Loss = 9.9788e-02, PNorm = 84.6017, GNorm = 0.7673, lr_0 = 1.8978e-04
Loss = 8.1868e-02, PNorm = 84.6043, GNorm = 0.5661, lr_0 = 1.8965e-04
Loss = 8.2694e-02, PNorm = 84.6083, GNorm = 0.5865, lr_0 = 1.8952e-04
Loss = 7.9141e-02, PNorm = 84.6132, GNorm = 0.7759, lr_0 = 1.8939e-04
Loss = 8.8068e-02, PNorm = 84.6164, GNorm = 1.1074, lr_0 = 1.8926e-04
Loss = 8.9520e-02, PNorm = 84.6206, GNorm = 1.0452, lr_0 = 1.8913e-04
Loss = 9.7542e-02, PNorm = 84.6248, GNorm = 0.6377, lr_0 = 1.8900e-04
Loss = 1.0196e-01, PNorm = 84.6272, GNorm = 0.5339, lr_0 = 1.8887e-04
Loss = 8.9882e-02, PNorm = 84.6298, GNorm = 0.7884, lr_0 = 1.8874e-04
Loss = 8.5270e-02, PNorm = 84.6314, GNorm = 0.6181, lr_0 = 1.8861e-04
Loss = 8.5286e-02, PNorm = 84.6314, GNorm = 0.5558, lr_0 = 1.8848e-04
Loss = 8.0281e-02, PNorm = 84.6364, GNorm = 0.8045, lr_0 = 1.8835e-04
Loss = 9.3927e-02, PNorm = 84.6408, GNorm = 0.5632, lr_0 = 1.8822e-04
Loss = 8.8154e-02, PNorm = 84.6440, GNorm = 0.7611, lr_0 = 1.8809e-04
Loss = 9.4182e-02, PNorm = 84.6487, GNorm = 0.5577, lr_0 = 1.8797e-04
Loss = 8.5895e-02, PNorm = 84.6524, GNorm = 0.7908, lr_0 = 1.8784e-04
Loss = 7.9147e-02, PNorm = 84.6545, GNorm = 0.5636, lr_0 = 1.8771e-04
Loss = 8.8903e-02, PNorm = 84.6564, GNorm = 0.5857, lr_0 = 1.8758e-04
Loss = 7.8671e-02, PNorm = 84.6592, GNorm = 0.6360, lr_0 = 1.8745e-04
Loss = 8.2376e-02, PNorm = 84.6615, GNorm = 0.7068, lr_0 = 1.8732e-04
Loss = 8.2681e-02, PNorm = 84.6629, GNorm = 0.5708, lr_0 = 1.8719e-04
Loss = 8.6588e-02, PNorm = 84.6658, GNorm = 0.6910, lr_0 = 1.8707e-04
Loss = 9.0329e-02, PNorm = 84.6686, GNorm = 0.7053, lr_0 = 1.8694e-04
Loss = 9.1435e-02, PNorm = 84.6724, GNorm = 0.5009, lr_0 = 1.8681e-04
Loss = 8.4112e-02, PNorm = 84.6767, GNorm = 0.7548, lr_0 = 1.8668e-04
Loss = 9.3007e-02, PNorm = 84.6800, GNorm = 0.8959, lr_0 = 1.8655e-04
Loss = 7.7588e-02, PNorm = 84.6868, GNorm = 0.6296, lr_0 = 1.8643e-04
Loss = 7.9718e-02, PNorm = 84.6910, GNorm = 1.0792, lr_0 = 1.8630e-04
Loss = 9.0523e-02, PNorm = 84.6953, GNorm = 0.5667, lr_0 = 1.8617e-04
Loss = 9.0079e-02, PNorm = 84.6980, GNorm = 0.6393, lr_0 = 1.8604e-04
Loss = 7.9952e-02, PNorm = 84.6976, GNorm = 0.5803, lr_0 = 1.8592e-04
Loss = 7.4219e-02, PNorm = 84.6984, GNorm = 0.4705, lr_0 = 1.8579e-04
Loss = 8.8555e-02, PNorm = 84.6978, GNorm = 0.4877, lr_0 = 1.8566e-04
Loss = 8.5123e-02, PNorm = 84.7003, GNorm = 0.6643, lr_0 = 1.8553e-04
Loss = 7.3828e-02, PNorm = 84.7040, GNorm = 0.7333, lr_0 = 1.8541e-04
Loss = 8.2493e-02, PNorm = 84.7064, GNorm = 0.6958, lr_0 = 1.8528e-04
Loss = 1.0127e-01, PNorm = 84.7120, GNorm = 0.6348, lr_0 = 1.8515e-04
Loss = 8.5093e-02, PNorm = 84.7162, GNorm = 0.7661, lr_0 = 1.8503e-04
Loss = 8.3801e-02, PNorm = 84.7232, GNorm = 0.9335, lr_0 = 1.8490e-04
Loss = 8.9312e-02, PNorm = 84.7279, GNorm = 0.6044, lr_0 = 1.8477e-04
Loss = 8.1905e-02, PNorm = 84.7288, GNorm = 0.7371, lr_0 = 1.8465e-04
Loss = 9.0169e-02, PNorm = 84.7327, GNorm = 0.6195, lr_0 = 1.8452e-04
Loss = 8.6505e-02, PNorm = 84.7376, GNorm = 0.7302, lr_0 = 1.8439e-04
Loss = 1.0001e-01, PNorm = 84.7416, GNorm = 0.5675, lr_0 = 1.8427e-04
Loss = 7.8335e-02, PNorm = 84.7454, GNorm = 0.5744, lr_0 = 1.8414e-04
Loss = 8.5866e-02, PNorm = 84.7488, GNorm = 0.6277, lr_0 = 1.8401e-04
Loss = 8.9062e-02, PNorm = 84.7514, GNorm = 0.6319, lr_0 = 1.8389e-04
Loss = 8.5974e-02, PNorm = 84.7520, GNorm = 0.4872, lr_0 = 1.8376e-04
Loss = 8.9359e-02, PNorm = 84.7547, GNorm = 0.8053, lr_0 = 1.8364e-04
Loss = 1.0688e-01, PNorm = 84.7581, GNorm = 0.6096, lr_0 = 1.8351e-04
Loss = 8.3226e-02, PNorm = 84.7606, GNorm = 0.5968, lr_0 = 1.8338e-04
Loss = 8.4817e-02, PNorm = 84.7657, GNorm = 0.6165, lr_0 = 1.8326e-04
Loss = 8.6168e-02, PNorm = 84.7682, GNorm = 0.6393, lr_0 = 1.8313e-04
Loss = 8.6345e-02, PNorm = 84.7712, GNorm = 0.7391, lr_0 = 1.8301e-04
Loss = 9.3075e-02, PNorm = 84.7741, GNorm = 0.5484, lr_0 = 1.8288e-04
Loss = 8.4910e-02, PNorm = 84.7778, GNorm = 0.7266, lr_0 = 1.8276e-04
Loss = 8.2482e-02, PNorm = 84.7787, GNorm = 0.5378, lr_0 = 1.8263e-04
Loss = 8.1230e-02, PNorm = 84.7828, GNorm = 0.7342, lr_0 = 1.8251e-04
Loss = 9.5774e-02, PNorm = 84.7845, GNorm = 0.5772, lr_0 = 1.8238e-04
Loss = 8.5703e-02, PNorm = 84.7866, GNorm = 0.6942, lr_0 = 1.8226e-04
Loss = 8.0250e-02, PNorm = 84.7886, GNorm = 0.7402, lr_0 = 1.8213e-04
Loss = 8.0655e-02, PNorm = 84.7954, GNorm = 0.6239, lr_0 = 1.8201e-04
Loss = 9.6708e-02, PNorm = 84.7991, GNorm = 0.5867, lr_0 = 1.8188e-04
Loss = 9.3004e-02, PNorm = 84.8004, GNorm = 0.8238, lr_0 = 1.8176e-04
Loss = 7.8953e-02, PNorm = 84.8033, GNorm = 0.4630, lr_0 = 1.8163e-04
Loss = 7.6611e-02, PNorm = 84.8065, GNorm = 0.5933, lr_0 = 1.8151e-04
Loss = 9.8477e-02, PNorm = 84.8103, GNorm = 0.7055, lr_0 = 1.8138e-04
Loss = 8.4112e-02, PNorm = 84.8137, GNorm = 0.6999, lr_0 = 1.8126e-04
Loss = 9.3133e-02, PNorm = 84.8137, GNorm = 0.7018, lr_0 = 1.8114e-04
Loss = 9.2041e-02, PNorm = 84.8169, GNorm = 0.7381, lr_0 = 1.8101e-04
Loss = 8.0945e-02, PNorm = 84.8189, GNorm = 0.7953, lr_0 = 1.8089e-04
Loss = 8.9240e-02, PNorm = 84.8204, GNorm = 0.5532, lr_0 = 1.8076e-04
Loss = 8.1073e-02, PNorm = 84.8232, GNorm = 0.5823, lr_0 = 1.8064e-04
Loss = 7.6825e-02, PNorm = 84.8265, GNorm = 0.9036, lr_0 = 1.8052e-04
Loss = 7.4561e-02, PNorm = 84.8295, GNorm = 0.5690, lr_0 = 1.8039e-04
Loss = 8.1691e-02, PNorm = 84.8339, GNorm = 0.5510, lr_0 = 1.8027e-04
Loss = 8.5776e-02, PNorm = 84.8404, GNorm = 0.8565, lr_0 = 1.8015e-04
Loss = 8.5093e-02, PNorm = 84.8434, GNorm = 0.4644, lr_0 = 1.8002e-04
Loss = 9.3588e-02, PNorm = 84.8462, GNorm = 0.8797, lr_0 = 1.7990e-04
Loss = 8.6778e-02, PNorm = 84.8472, GNorm = 0.5204, lr_0 = 1.7978e-04
Loss = 9.7236e-02, PNorm = 84.8466, GNorm = 0.7553, lr_0 = 1.7965e-04
Loss = 8.6263e-02, PNorm = 84.8497, GNorm = 0.5400, lr_0 = 1.7953e-04
Loss = 8.2940e-02, PNorm = 84.8538, GNorm = 0.4711, lr_0 = 1.7941e-04
Loss = 7.9681e-02, PNorm = 84.8588, GNorm = 0.7000, lr_0 = 1.7928e-04
Loss = 9.6079e-02, PNorm = 84.8651, GNorm = 0.7949, lr_0 = 1.7916e-04
Loss = 9.1414e-02, PNorm = 84.8712, GNorm = 0.4675, lr_0 = 1.7904e-04
Loss = 8.4830e-02, PNorm = 84.8735, GNorm = 0.5978, lr_0 = 1.7892e-04
Loss = 9.0860e-02, PNorm = 84.8756, GNorm = 0.5942, lr_0 = 1.7879e-04
Loss = 9.4913e-02, PNorm = 84.8797, GNorm = 0.6812, lr_0 = 1.7867e-04
Loss = 9.3721e-02, PNorm = 84.8803, GNorm = 0.6129, lr_0 = 1.7855e-04
Loss = 9.3414e-02, PNorm = 84.8835, GNorm = 1.1023, lr_0 = 1.7843e-04
Loss = 9.0827e-02, PNorm = 84.8873, GNorm = 0.6622, lr_0 = 1.7830e-04
Loss = 9.0206e-02, PNorm = 84.8906, GNorm = 0.6283, lr_0 = 1.7818e-04
Loss = 7.5766e-02, PNorm = 84.8935, GNorm = 0.5044, lr_0 = 1.7806e-04
Loss = 8.8058e-02, PNorm = 84.8970, GNorm = 0.6644, lr_0 = 1.7794e-04
Loss = 8.6494e-02, PNorm = 84.9006, GNorm = 0.5338, lr_0 = 1.7782e-04
Validation mae = 0.229151
Epoch 23
Loss = 9.3602e-02, PNorm = 84.9055, GNorm = 0.6659, lr_0 = 1.7769e-04
Loss = 8.4566e-02, PNorm = 84.9090, GNorm = 0.6471, lr_0 = 1.7757e-04
Loss = 8.2121e-02, PNorm = 84.9121, GNorm = 0.6326, lr_0 = 1.7745e-04
Loss = 7.2914e-02, PNorm = 84.9126, GNorm = 0.5722, lr_0 = 1.7733e-04
Loss = 7.9190e-02, PNorm = 84.9155, GNorm = 0.5568, lr_0 = 1.7721e-04
Loss = 7.1879e-02, PNorm = 84.9181, GNorm = 0.5405, lr_0 = 1.7709e-04
Loss = 8.1610e-02, PNorm = 84.9229, GNorm = 0.7102, lr_0 = 1.7696e-04
Loss = 7.7255e-02, PNorm = 84.9259, GNorm = 0.7230, lr_0 = 1.7684e-04
Loss = 7.9743e-02, PNorm = 84.9272, GNorm = 0.6568, lr_0 = 1.7672e-04
Loss = 7.9318e-02, PNorm = 84.9304, GNorm = 0.5794, lr_0 = 1.7660e-04
Loss = 6.9605e-02, PNorm = 84.9339, GNorm = 0.5016, lr_0 = 1.7648e-04
Loss = 7.9750e-02, PNorm = 84.9346, GNorm = 0.6773, lr_0 = 1.7636e-04
Loss = 9.0414e-02, PNorm = 84.9370, GNorm = 0.7483, lr_0 = 1.7624e-04
Loss = 8.4473e-02, PNorm = 84.9415, GNorm = 0.7353, lr_0 = 1.7612e-04
Loss = 9.7644e-02, PNorm = 84.9453, GNorm = 0.7353, lr_0 = 1.7600e-04
Loss = 8.9581e-02, PNorm = 84.9486, GNorm = 0.6576, lr_0 = 1.7588e-04
Loss = 8.2982e-02, PNorm = 84.9531, GNorm = 0.5287, lr_0 = 1.7576e-04
Loss = 9.2868e-02, PNorm = 84.9565, GNorm = 1.1200, lr_0 = 1.7564e-04
Loss = 8.3360e-02, PNorm = 84.9590, GNorm = 0.7746, lr_0 = 1.7552e-04
Loss = 8.8956e-02, PNorm = 84.9635, GNorm = 0.7614, lr_0 = 1.7540e-04
Loss = 7.6438e-02, PNorm = 84.9647, GNorm = 0.6726, lr_0 = 1.7528e-04
Loss = 8.1327e-02, PNorm = 84.9655, GNorm = 0.5997, lr_0 = 1.7516e-04
Loss = 7.3440e-02, PNorm = 84.9697, GNorm = 0.7074, lr_0 = 1.7504e-04
Loss = 8.5807e-02, PNorm = 84.9733, GNorm = 0.7062, lr_0 = 1.7492e-04
Loss = 9.0247e-02, PNorm = 84.9782, GNorm = 0.6740, lr_0 = 1.7480e-04
Loss = 7.9929e-02, PNorm = 84.9826, GNorm = 0.5985, lr_0 = 1.7468e-04
Loss = 8.7667e-02, PNorm = 84.9885, GNorm = 0.5616, lr_0 = 1.7456e-04
Loss = 7.9753e-02, PNorm = 84.9930, GNorm = 0.4421, lr_0 = 1.7444e-04
Loss = 8.5880e-02, PNorm = 84.9965, GNorm = 0.6144, lr_0 = 1.7432e-04
Loss = 9.7663e-02, PNorm = 84.9987, GNorm = 0.5667, lr_0 = 1.7420e-04
Loss = 8.5928e-02, PNorm = 85.0007, GNorm = 0.6870, lr_0 = 1.7408e-04
Loss = 9.1251e-02, PNorm = 85.0043, GNorm = 0.7840, lr_0 = 1.7396e-04
Loss = 7.8703e-02, PNorm = 85.0064, GNorm = 0.5628, lr_0 = 1.7384e-04
Loss = 9.0208e-02, PNorm = 85.0085, GNorm = 0.6817, lr_0 = 1.7372e-04
Loss = 8.0868e-02, PNorm = 85.0122, GNorm = 0.6994, lr_0 = 1.7360e-04
Loss = 8.9004e-02, PNorm = 85.0166, GNorm = 0.5585, lr_0 = 1.7348e-04
Loss = 8.5776e-02, PNorm = 85.0192, GNorm = 0.5329, lr_0 = 1.7336e-04
Loss = 8.4233e-02, PNorm = 85.0214, GNorm = 0.6951, lr_0 = 1.7325e-04
Loss = 7.6471e-02, PNorm = 85.0230, GNorm = 0.7003, lr_0 = 1.7313e-04
Loss = 8.5200e-02, PNorm = 85.0267, GNorm = 0.6223, lr_0 = 1.7301e-04
Loss = 8.8153e-02, PNorm = 85.0305, GNorm = 0.6333, lr_0 = 1.7289e-04
Loss = 7.1371e-02, PNorm = 85.0324, GNorm = 0.5123, lr_0 = 1.7277e-04
Loss = 7.9487e-02, PNorm = 85.0334, GNorm = 0.5474, lr_0 = 1.7265e-04
Loss = 9.4436e-02, PNorm = 85.0354, GNorm = 0.7399, lr_0 = 1.7253e-04
Loss = 7.6405e-02, PNorm = 85.0379, GNorm = 0.7747, lr_0 = 1.7242e-04
Loss = 9.1713e-02, PNorm = 85.0409, GNorm = 0.8781, lr_0 = 1.7230e-04
Loss = 8.1381e-02, PNorm = 85.0435, GNorm = 0.6873, lr_0 = 1.7218e-04
Loss = 9.6082e-02, PNorm = 85.0460, GNorm = 0.5839, lr_0 = 1.7206e-04
Loss = 9.0261e-02, PNorm = 85.0487, GNorm = 0.7226, lr_0 = 1.7194e-04
Loss = 8.2226e-02, PNorm = 85.0529, GNorm = 0.5788, lr_0 = 1.7183e-04
Loss = 8.3877e-02, PNorm = 85.0566, GNorm = 0.5190, lr_0 = 1.7171e-04
Loss = 7.6613e-02, PNorm = 85.0596, GNorm = 0.5493, lr_0 = 1.7159e-04
Loss = 8.1827e-02, PNorm = 85.0634, GNorm = 0.5069, lr_0 = 1.7147e-04
Loss = 8.5353e-02, PNorm = 85.0635, GNorm = 0.5943, lr_0 = 1.7136e-04
Loss = 8.4129e-02, PNorm = 85.0661, GNorm = 0.9334, lr_0 = 1.7124e-04
Loss = 7.4975e-02, PNorm = 85.0693, GNorm = 0.6207, lr_0 = 1.7112e-04
Loss = 7.1837e-02, PNorm = 85.0723, GNorm = 0.6092, lr_0 = 1.7100e-04
Loss = 8.4756e-02, PNorm = 85.0738, GNorm = 0.5874, lr_0 = 1.7089e-04
Loss = 9.1280e-02, PNorm = 85.0750, GNorm = 0.7079, lr_0 = 1.7077e-04
Loss = 9.3144e-02, PNorm = 85.0766, GNorm = 0.5958, lr_0 = 1.7065e-04
Loss = 9.7006e-02, PNorm = 85.0781, GNorm = 0.8108, lr_0 = 1.7054e-04
Loss = 8.7254e-02, PNorm = 85.0779, GNorm = 0.5792, lr_0 = 1.7042e-04
Loss = 7.6582e-02, PNorm = 85.0812, GNorm = 0.5133, lr_0 = 1.7030e-04
Loss = 9.7905e-02, PNorm = 85.0839, GNorm = 1.1427, lr_0 = 1.7019e-04
Loss = 8.4978e-02, PNorm = 85.0843, GNorm = 0.5767, lr_0 = 1.7007e-04
Loss = 8.8782e-02, PNorm = 85.0872, GNorm = 0.6476, lr_0 = 1.6995e-04
Loss = 8.0457e-02, PNorm = 85.0890, GNorm = 0.5761, lr_0 = 1.6984e-04
Loss = 8.4389e-02, PNorm = 85.0924, GNorm = 0.6343, lr_0 = 1.6972e-04
Loss = 7.7644e-02, PNorm = 85.0944, GNorm = 0.6302, lr_0 = 1.6960e-04
Loss = 8.5267e-02, PNorm = 85.0989, GNorm = 0.4083, lr_0 = 1.6949e-04
Loss = 7.6891e-02, PNorm = 85.1008, GNorm = 0.6552, lr_0 = 1.6937e-04
Loss = 8.0461e-02, PNorm = 85.1030, GNorm = 0.8561, lr_0 = 1.6926e-04
Loss = 8.4248e-02, PNorm = 85.1049, GNorm = 0.6100, lr_0 = 1.6914e-04
Loss = 9.6059e-02, PNorm = 85.1088, GNorm = 0.5570, lr_0 = 1.6902e-04
Loss = 8.7502e-02, PNorm = 85.1134, GNorm = 0.6823, lr_0 = 1.6891e-04
Loss = 8.3481e-02, PNorm = 85.1147, GNorm = 0.5797, lr_0 = 1.6879e-04
Loss = 8.7042e-02, PNorm = 85.1183, GNorm = 0.5612, lr_0 = 1.6868e-04
Loss = 8.9922e-02, PNorm = 85.1193, GNorm = 0.8311, lr_0 = 1.6856e-04
Loss = 9.1948e-02, PNorm = 85.1216, GNorm = 0.6910, lr_0 = 1.6845e-04
Loss = 9.0558e-02, PNorm = 85.1240, GNorm = 0.7611, lr_0 = 1.6833e-04
Loss = 7.2442e-02, PNorm = 85.1266, GNorm = 0.7593, lr_0 = 1.6821e-04
Loss = 9.1923e-02, PNorm = 85.1280, GNorm = 0.6002, lr_0 = 1.6810e-04
Loss = 7.6459e-02, PNorm = 85.1316, GNorm = 0.6781, lr_0 = 1.6798e-04
Loss = 7.3204e-02, PNorm = 85.1354, GNorm = 0.6162, lr_0 = 1.6787e-04
Loss = 8.0550e-02, PNorm = 85.1380, GNorm = 0.7014, lr_0 = 1.6775e-04
Loss = 7.9553e-02, PNorm = 85.1397, GNorm = 0.5934, lr_0 = 1.6764e-04
Loss = 8.3814e-02, PNorm = 85.1413, GNorm = 0.5705, lr_0 = 1.6752e-04
Loss = 8.6691e-02, PNorm = 85.1461, GNorm = 0.6482, lr_0 = 1.6741e-04
Loss = 8.0002e-02, PNorm = 85.1505, GNorm = 0.7806, lr_0 = 1.6729e-04
Loss = 8.9978e-02, PNorm = 85.1553, GNorm = 0.5789, lr_0 = 1.6718e-04
Loss = 8.3607e-02, PNorm = 85.1566, GNorm = 0.7335, lr_0 = 1.6707e-04
Loss = 7.7899e-02, PNorm = 85.1578, GNorm = 0.5113, lr_0 = 1.6695e-04
Loss = 1.0014e-01, PNorm = 85.1592, GNorm = 0.8008, lr_0 = 1.6684e-04
Loss = 9.2702e-02, PNorm = 85.1635, GNorm = 0.7646, lr_0 = 1.6672e-04
Loss = 9.3715e-02, PNorm = 85.1670, GNorm = 0.8558, lr_0 = 1.6661e-04
Loss = 7.8637e-02, PNorm = 85.1699, GNorm = 0.5180, lr_0 = 1.6649e-04
Loss = 7.6606e-02, PNorm = 85.1744, GNorm = 0.4933, lr_0 = 1.6638e-04
Loss = 8.2120e-02, PNorm = 85.1774, GNorm = 0.5739, lr_0 = 1.6627e-04
Loss = 9.4596e-02, PNorm = 85.1794, GNorm = 0.6582, lr_0 = 1.6615e-04
Loss = 7.6306e-02, PNorm = 85.1813, GNorm = 0.4217, lr_0 = 1.6604e-04
Loss = 8.0697e-02, PNorm = 85.1825, GNorm = 0.6297, lr_0 = 1.6592e-04
Loss = 7.7037e-02, PNorm = 85.1847, GNorm = 0.6686, lr_0 = 1.6581e-04
Loss = 7.3492e-02, PNorm = 85.1858, GNorm = 0.5130, lr_0 = 1.6570e-04
Loss = 7.7209e-02, PNorm = 85.1887, GNorm = 0.5938, lr_0 = 1.6558e-04
Loss = 7.4916e-02, PNorm = 85.1899, GNorm = 0.6573, lr_0 = 1.6547e-04
Loss = 8.5210e-02, PNorm = 85.1900, GNorm = 0.6003, lr_0 = 1.6536e-04
Loss = 8.4292e-02, PNorm = 85.1912, GNorm = 0.8507, lr_0 = 1.6524e-04
Loss = 8.7124e-02, PNorm = 85.1950, GNorm = 0.9460, lr_0 = 1.6513e-04
Loss = 8.2001e-02, PNorm = 85.1981, GNorm = 0.6160, lr_0 = 1.6502e-04
Loss = 9.1459e-02, PNorm = 85.2001, GNorm = 0.7001, lr_0 = 1.6490e-04
Loss = 8.3024e-02, PNorm = 85.2021, GNorm = 0.5587, lr_0 = 1.6479e-04
Loss = 7.9145e-02, PNorm = 85.2060, GNorm = 0.5264, lr_0 = 1.6468e-04
Loss = 8.5750e-02, PNorm = 85.2099, GNorm = 0.5256, lr_0 = 1.6457e-04
Loss = 8.5594e-02, PNorm = 85.2118, GNorm = 0.7270, lr_0 = 1.6445e-04
Loss = 8.9117e-02, PNorm = 85.2141, GNorm = 0.6775, lr_0 = 1.6434e-04
Loss = 9.0525e-02, PNorm = 85.2163, GNorm = 0.5654, lr_0 = 1.6423e-04
Loss = 8.5913e-02, PNorm = 85.2175, GNorm = 0.8316, lr_0 = 1.6412e-04
Loss = 8.2348e-02, PNorm = 85.2201, GNorm = 0.6875, lr_0 = 1.6400e-04
Loss = 8.5758e-02, PNorm = 85.2235, GNorm = 0.5654, lr_0 = 1.6389e-04
Loss = 8.3747e-02, PNorm = 85.2243, GNorm = 0.5735, lr_0 = 1.6378e-04
Validation mae = 0.226745
Epoch 24
Loss = 8.5743e-02, PNorm = 85.2261, GNorm = 0.7996, lr_0 = 1.6367e-04
Loss = 7.6896e-02, PNorm = 85.2277, GNorm = 0.7668, lr_0 = 1.6355e-04
Loss = 7.4566e-02, PNorm = 85.2288, GNorm = 0.5257, lr_0 = 1.6344e-04
Loss = 8.0781e-02, PNorm = 85.2315, GNorm = 0.6588, lr_0 = 1.6333e-04
Loss = 7.2569e-02, PNorm = 85.2351, GNorm = 0.4906, lr_0 = 1.6322e-04
Loss = 8.0683e-02, PNorm = 85.2397, GNorm = 0.5131, lr_0 = 1.6311e-04
Loss = 8.4214e-02, PNorm = 85.2440, GNorm = 0.4450, lr_0 = 1.6299e-04
Loss = 7.4134e-02, PNorm = 85.2457, GNorm = 0.7311, lr_0 = 1.6288e-04
Loss = 8.9704e-02, PNorm = 85.2472, GNorm = 0.6582, lr_0 = 1.6277e-04
Loss = 7.6040e-02, PNorm = 85.2496, GNorm = 0.4711, lr_0 = 1.6266e-04
Loss = 9.4000e-02, PNorm = 85.2545, GNorm = 1.0847, lr_0 = 1.6255e-04
Loss = 8.8055e-02, PNorm = 85.2590, GNorm = 0.5099, lr_0 = 1.6244e-04
Loss = 7.7262e-02, PNorm = 85.2641, GNorm = 0.6846, lr_0 = 1.6233e-04
Loss = 8.2114e-02, PNorm = 85.2683, GNorm = 0.7066, lr_0 = 1.6221e-04
Loss = 8.0068e-02, PNorm = 85.2721, GNorm = 0.6899, lr_0 = 1.6210e-04
Loss = 9.5100e-02, PNorm = 85.2743, GNorm = 0.8808, lr_0 = 1.6199e-04
Loss = 7.8375e-02, PNorm = 85.2763, GNorm = 0.8382, lr_0 = 1.6188e-04
Loss = 7.2518e-02, PNorm = 85.2780, GNorm = 0.5956, lr_0 = 1.6177e-04
Loss = 8.8334e-02, PNorm = 85.2799, GNorm = 0.5223, lr_0 = 1.6166e-04
Loss = 8.2035e-02, PNorm = 85.2835, GNorm = 0.4985, lr_0 = 1.6155e-04
Loss = 8.4144e-02, PNorm = 85.2843, GNorm = 0.7925, lr_0 = 1.6144e-04
Loss = 8.1685e-02, PNorm = 85.2860, GNorm = 0.7231, lr_0 = 1.6133e-04
Loss = 8.2266e-02, PNorm = 85.2883, GNorm = 0.5312, lr_0 = 1.6122e-04
Loss = 8.8534e-02, PNorm = 85.2921, GNorm = 0.6754, lr_0 = 1.6111e-04
Loss = 7.9341e-02, PNorm = 85.2963, GNorm = 0.7409, lr_0 = 1.6100e-04
Loss = 9.9002e-02, PNorm = 85.3005, GNorm = 0.6962, lr_0 = 1.6089e-04
Loss = 8.4554e-02, PNorm = 85.3040, GNorm = 0.5397, lr_0 = 1.6078e-04
Loss = 7.7408e-02, PNorm = 85.3061, GNorm = 0.5429, lr_0 = 1.6067e-04
Loss = 8.1400e-02, PNorm = 85.3080, GNorm = 0.5427, lr_0 = 1.6056e-04
Loss = 7.3639e-02, PNorm = 85.3106, GNorm = 0.5203, lr_0 = 1.6045e-04
Loss = 7.6103e-02, PNorm = 85.3126, GNorm = 0.5188, lr_0 = 1.6034e-04
Loss = 8.5908e-02, PNorm = 85.3161, GNorm = 0.5279, lr_0 = 1.6023e-04
Loss = 7.5423e-02, PNorm = 85.3187, GNorm = 0.6282, lr_0 = 1.6012e-04
Loss = 7.7651e-02, PNorm = 85.3213, GNorm = 0.4801, lr_0 = 1.6001e-04
Loss = 7.2793e-02, PNorm = 85.3252, GNorm = 0.5776, lr_0 = 1.5990e-04
Loss = 8.4789e-02, PNorm = 85.3257, GNorm = 0.7901, lr_0 = 1.5979e-04
Loss = 8.2202e-02, PNorm = 85.3279, GNorm = 0.6892, lr_0 = 1.5968e-04
Loss = 8.0614e-02, PNorm = 85.3308, GNorm = 0.6234, lr_0 = 1.5957e-04
Loss = 7.3991e-02, PNorm = 85.3315, GNorm = 0.6667, lr_0 = 1.5946e-04
Loss = 8.1751e-02, PNorm = 85.3340, GNorm = 0.5578, lr_0 = 1.5935e-04
Loss = 8.2514e-02, PNorm = 85.3347, GNorm = 0.6792, lr_0 = 1.5924e-04
Loss = 8.6648e-02, PNorm = 85.3370, GNorm = 0.5818, lr_0 = 1.5913e-04
Loss = 8.5261e-02, PNorm = 85.3403, GNorm = 0.6669, lr_0 = 1.5902e-04
Loss = 8.0502e-02, PNorm = 85.3431, GNorm = 0.6296, lr_0 = 1.5891e-04
Loss = 7.9768e-02, PNorm = 85.3451, GNorm = 0.7330, lr_0 = 1.5880e-04
Loss = 9.0702e-02, PNorm = 85.3471, GNorm = 0.9495, lr_0 = 1.5870e-04
Loss = 7.6551e-02, PNorm = 85.3477, GNorm = 0.6185, lr_0 = 1.5859e-04
Loss = 7.9193e-02, PNorm = 85.3479, GNorm = 0.4946, lr_0 = 1.5848e-04
Loss = 8.2328e-02, PNorm = 85.3511, GNorm = 0.7492, lr_0 = 1.5837e-04
Loss = 9.0919e-02, PNorm = 85.3523, GNorm = 1.3412, lr_0 = 1.5826e-04
Loss = 9.6190e-02, PNorm = 85.3535, GNorm = 1.1825, lr_0 = 1.5815e-04
Loss = 9.1313e-02, PNorm = 85.3580, GNorm = 0.6595, lr_0 = 1.5804e-04
Loss = 7.9139e-02, PNorm = 85.3604, GNorm = 0.6665, lr_0 = 1.5794e-04
Loss = 8.5858e-02, PNorm = 85.3640, GNorm = 0.6570, lr_0 = 1.5783e-04
Loss = 7.7727e-02, PNorm = 85.3678, GNorm = 0.6507, lr_0 = 1.5772e-04
Loss = 7.5067e-02, PNorm = 85.3718, GNorm = 0.5215, lr_0 = 1.5761e-04
Loss = 7.3124e-02, PNorm = 85.3751, GNorm = 0.6394, lr_0 = 1.5750e-04
Loss = 7.9498e-02, PNorm = 85.3777, GNorm = 0.6810, lr_0 = 1.5740e-04
Loss = 8.6785e-02, PNorm = 85.3809, GNorm = 0.7118, lr_0 = 1.5729e-04
Loss = 7.8553e-02, PNorm = 85.3824, GNorm = 0.4936, lr_0 = 1.5718e-04
Loss = 8.0998e-02, PNorm = 85.3844, GNorm = 0.4997, lr_0 = 1.5707e-04
Loss = 7.7769e-02, PNorm = 85.3859, GNorm = 0.6154, lr_0 = 1.5697e-04
Loss = 8.5595e-02, PNorm = 85.3885, GNorm = 0.6126, lr_0 = 1.5686e-04
Loss = 8.9272e-02, PNorm = 85.3922, GNorm = 0.6543, lr_0 = 1.5675e-04
Loss = 8.1176e-02, PNorm = 85.3949, GNorm = 0.7660, lr_0 = 1.5664e-04
Loss = 8.6722e-02, PNorm = 85.3953, GNorm = 0.5683, lr_0 = 1.5654e-04
Loss = 8.4586e-02, PNorm = 85.3974, GNorm = 0.5582, lr_0 = 1.5643e-04
Loss = 8.3744e-02, PNorm = 85.3989, GNorm = 0.4544, lr_0 = 1.5632e-04
Loss = 7.4184e-02, PNorm = 85.4028, GNorm = 0.5365, lr_0 = 1.5621e-04
Loss = 7.8879e-02, PNorm = 85.4070, GNorm = 0.5730, lr_0 = 1.5611e-04
Loss = 8.7230e-02, PNorm = 85.4095, GNorm = 0.7754, lr_0 = 1.5600e-04
Loss = 8.5264e-02, PNorm = 85.4115, GNorm = 0.6379, lr_0 = 1.5589e-04
Loss = 9.9724e-02, PNorm = 85.4130, GNorm = 0.8447, lr_0 = 1.5579e-04
Loss = 7.6493e-02, PNorm = 85.4176, GNorm = 0.6276, lr_0 = 1.5568e-04
Loss = 9.6440e-02, PNorm = 85.4204, GNorm = 0.6377, lr_0 = 1.5557e-04
Loss = 7.8034e-02, PNorm = 85.4229, GNorm = 0.6424, lr_0 = 1.5547e-04
Loss = 8.6273e-02, PNorm = 85.4238, GNorm = 1.1217, lr_0 = 1.5536e-04
Loss = 8.4260e-02, PNorm = 85.4245, GNorm = 0.5276, lr_0 = 1.5525e-04
Loss = 7.5789e-02, PNorm = 85.4273, GNorm = 0.5457, lr_0 = 1.5515e-04
Loss = 7.8628e-02, PNorm = 85.4284, GNorm = 0.5153, lr_0 = 1.5504e-04
Loss = 9.1260e-02, PNorm = 85.4290, GNorm = 0.5866, lr_0 = 1.5493e-04
Loss = 9.0183e-02, PNorm = 85.4312, GNorm = 0.5579, lr_0 = 1.5483e-04
Loss = 7.1645e-02, PNorm = 85.4328, GNorm = 0.7576, lr_0 = 1.5472e-04
Loss = 7.7567e-02, PNorm = 85.4338, GNorm = 0.7168, lr_0 = 1.5462e-04
Loss = 9.3955e-02, PNorm = 85.4349, GNorm = 0.5972, lr_0 = 1.5451e-04
Loss = 7.9780e-02, PNorm = 85.4374, GNorm = 0.6520, lr_0 = 1.5440e-04
Loss = 8.6245e-02, PNorm = 85.4421, GNorm = 0.7520, lr_0 = 1.5430e-04
Loss = 8.8257e-02, PNorm = 85.4467, GNorm = 0.8367, lr_0 = 1.5419e-04
Loss = 8.1069e-02, PNorm = 85.4472, GNorm = 0.5971, lr_0 = 1.5409e-04
Loss = 8.3187e-02, PNorm = 85.4479, GNorm = 0.7129, lr_0 = 1.5398e-04
Loss = 9.1941e-02, PNorm = 85.4511, GNorm = 0.7798, lr_0 = 1.5388e-04
Loss = 7.6579e-02, PNorm = 85.4543, GNorm = 0.6210, lr_0 = 1.5377e-04
Loss = 9.0828e-02, PNorm = 85.4552, GNorm = 0.5204, lr_0 = 1.5367e-04
Loss = 8.2944e-02, PNorm = 85.4581, GNorm = 0.8760, lr_0 = 1.5356e-04
Loss = 9.1819e-02, PNorm = 85.4613, GNorm = 0.7424, lr_0 = 1.5346e-04
Loss = 7.7979e-02, PNorm = 85.4650, GNorm = 0.8201, lr_0 = 1.5335e-04
Loss = 7.9108e-02, PNorm = 85.4656, GNorm = 0.4868, lr_0 = 1.5325e-04
Loss = 8.1513e-02, PNorm = 85.4673, GNorm = 0.6097, lr_0 = 1.5314e-04
Loss = 8.1320e-02, PNorm = 85.4686, GNorm = 0.4618, lr_0 = 1.5304e-04
Loss = 7.9238e-02, PNorm = 85.4710, GNorm = 0.5683, lr_0 = 1.5293e-04
Loss = 8.6842e-02, PNorm = 85.4739, GNorm = 0.5357, lr_0 = 1.5283e-04
Loss = 8.5938e-02, PNorm = 85.4767, GNorm = 0.6748, lr_0 = 1.5272e-04
Loss = 8.1190e-02, PNorm = 85.4807, GNorm = 0.6164, lr_0 = 1.5262e-04
Loss = 9.2813e-02, PNorm = 85.4834, GNorm = 0.7441, lr_0 = 1.5251e-04
Loss = 9.0798e-02, PNorm = 85.4863, GNorm = 0.5745, lr_0 = 1.5241e-04
Loss = 8.4242e-02, PNorm = 85.4864, GNorm = 0.5314, lr_0 = 1.5230e-04
Loss = 8.0880e-02, PNorm = 85.4896, GNorm = 0.6269, lr_0 = 1.5220e-04
Loss = 8.5656e-02, PNorm = 85.4908, GNorm = 0.7368, lr_0 = 1.5209e-04
Loss = 8.4464e-02, PNorm = 85.4948, GNorm = 0.6825, lr_0 = 1.5199e-04
Loss = 8.4734e-02, PNorm = 85.4962, GNorm = 0.6138, lr_0 = 1.5189e-04
Loss = 7.9960e-02, PNorm = 85.4960, GNorm = 0.6834, lr_0 = 1.5178e-04
Loss = 8.0259e-02, PNorm = 85.4974, GNorm = 0.5482, lr_0 = 1.5168e-04
Loss = 8.0654e-02, PNorm = 85.5015, GNorm = 1.0108, lr_0 = 1.5157e-04
Loss = 7.8836e-02, PNorm = 85.5035, GNorm = 0.6460, lr_0 = 1.5147e-04
Loss = 9.7400e-02, PNorm = 85.5056, GNorm = 0.8102, lr_0 = 1.5137e-04
Loss = 8.4710e-02, PNorm = 85.5078, GNorm = 0.6296, lr_0 = 1.5126e-04
Loss = 8.6503e-02, PNorm = 85.5104, GNorm = 0.6757, lr_0 = 1.5116e-04
Loss = 1.0043e-01, PNorm = 85.5140, GNorm = 0.9496, lr_0 = 1.5106e-04
Loss = 8.4836e-02, PNorm = 85.5167, GNorm = 0.9494, lr_0 = 1.5095e-04
Loss = 8.3034e-02, PNorm = 85.5192, GNorm = 0.5404, lr_0 = 1.5085e-04
Validation mae = 0.228149
Epoch 25
Loss = 7.2207e-02, PNorm = 85.5204, GNorm = 0.5374, lr_0 = 1.5075e-04
Loss = 8.8132e-02, PNorm = 85.5257, GNorm = 0.6564, lr_0 = 1.5064e-04
Loss = 7.8391e-02, PNorm = 85.5275, GNorm = 0.5281, lr_0 = 1.5054e-04
Loss = 7.5297e-02, PNorm = 85.5291, GNorm = 0.5268, lr_0 = 1.5044e-04
Loss = 6.6825e-02, PNorm = 85.5332, GNorm = 0.5430, lr_0 = 1.5033e-04
Loss = 6.7243e-02, PNorm = 85.5350, GNorm = 0.5761, lr_0 = 1.5023e-04
Loss = 8.2178e-02, PNorm = 85.5372, GNorm = 0.6049, lr_0 = 1.5013e-04
Loss = 8.2168e-02, PNorm = 85.5386, GNorm = 0.6195, lr_0 = 1.5002e-04
Loss = 8.2181e-02, PNorm = 85.5404, GNorm = 0.8373, lr_0 = 1.4992e-04
Loss = 8.2914e-02, PNorm = 85.5438, GNorm = 0.7434, lr_0 = 1.4982e-04
Loss = 7.1944e-02, PNorm = 85.5471, GNorm = 0.6783, lr_0 = 1.4972e-04
Loss = 7.3652e-02, PNorm = 85.5502, GNorm = 0.5756, lr_0 = 1.4961e-04
Loss = 8.3692e-02, PNorm = 85.5514, GNorm = 1.0030, lr_0 = 1.4951e-04
Loss = 8.2909e-02, PNorm = 85.5531, GNorm = 0.7529, lr_0 = 1.4941e-04
Loss = 7.7719e-02, PNorm = 85.5545, GNorm = 0.6789, lr_0 = 1.4931e-04
Loss = 8.0871e-02, PNorm = 85.5576, GNorm = 0.8485, lr_0 = 1.4920e-04
Loss = 8.0943e-02, PNorm = 85.5585, GNorm = 0.5352, lr_0 = 1.4910e-04
Loss = 8.1763e-02, PNorm = 85.5610, GNorm = 0.6962, lr_0 = 1.4900e-04
Loss = 7.3166e-02, PNorm = 85.5624, GNorm = 0.7934, lr_0 = 1.4890e-04
Loss = 8.5949e-02, PNorm = 85.5634, GNorm = 0.7417, lr_0 = 1.4880e-04
Loss = 7.5740e-02, PNorm = 85.5664, GNorm = 0.5872, lr_0 = 1.4869e-04
Loss = 8.1102e-02, PNorm = 85.5704, GNorm = 0.6637, lr_0 = 1.4859e-04
Loss = 9.2177e-02, PNorm = 85.5707, GNorm = 0.6165, lr_0 = 1.4849e-04
Loss = 7.2322e-02, PNorm = 85.5728, GNorm = 0.7794, lr_0 = 1.4839e-04
Loss = 8.1927e-02, PNorm = 85.5783, GNorm = 0.6953, lr_0 = 1.4829e-04
Loss = 7.9481e-02, PNorm = 85.5798, GNorm = 0.6018, lr_0 = 1.4818e-04
Loss = 8.2142e-02, PNorm = 85.5811, GNorm = 0.5929, lr_0 = 1.4808e-04
Loss = 7.1374e-02, PNorm = 85.5839, GNorm = 0.6034, lr_0 = 1.4798e-04
Loss = 7.4343e-02, PNorm = 85.5861, GNorm = 0.4120, lr_0 = 1.4788e-04
Loss = 7.3636e-02, PNorm = 85.5875, GNorm = 0.6818, lr_0 = 1.4778e-04
Loss = 7.2158e-02, PNorm = 85.5919, GNorm = 0.7794, lr_0 = 1.4768e-04
Loss = 8.0590e-02, PNorm = 85.5955, GNorm = 0.5705, lr_0 = 1.4758e-04
Loss = 8.1062e-02, PNorm = 85.5992, GNorm = 0.8142, lr_0 = 1.4748e-04
Loss = 7.9395e-02, PNorm = 85.6021, GNorm = 0.5977, lr_0 = 1.4737e-04
Loss = 7.5617e-02, PNorm = 85.6035, GNorm = 0.6488, lr_0 = 1.4727e-04
Loss = 8.3164e-02, PNorm = 85.6066, GNorm = 0.6975, lr_0 = 1.4717e-04
Loss = 8.3655e-02, PNorm = 85.6063, GNorm = 0.6097, lr_0 = 1.4707e-04
Loss = 1.0077e-01, PNorm = 85.6085, GNorm = 0.6776, lr_0 = 1.4697e-04
Loss = 8.5725e-02, PNorm = 85.6117, GNorm = 0.5624, lr_0 = 1.4687e-04
Loss = 8.7722e-02, PNorm = 85.6145, GNorm = 0.5850, lr_0 = 1.4677e-04
Loss = 8.0221e-02, PNorm = 85.6169, GNorm = 0.5596, lr_0 = 1.4667e-04
Loss = 7.4134e-02, PNorm = 85.6181, GNorm = 0.8551, lr_0 = 1.4657e-04
Loss = 7.3952e-02, PNorm = 85.6196, GNorm = 0.7051, lr_0 = 1.4647e-04
Loss = 8.0668e-02, PNorm = 85.6230, GNorm = 0.7701, lr_0 = 1.4637e-04
Loss = 8.6556e-02, PNorm = 85.6256, GNorm = 0.4290, lr_0 = 1.4627e-04
Loss = 7.7823e-02, PNorm = 85.6302, GNorm = 0.5029, lr_0 = 1.4617e-04
Loss = 8.2308e-02, PNorm = 85.6322, GNorm = 0.6282, lr_0 = 1.4607e-04
Loss = 7.3898e-02, PNorm = 85.6346, GNorm = 0.5724, lr_0 = 1.4597e-04
Loss = 8.3642e-02, PNorm = 85.6378, GNorm = 0.6583, lr_0 = 1.4587e-04
Loss = 8.1877e-02, PNorm = 85.6371, GNorm = 0.6770, lr_0 = 1.4577e-04
Loss = 7.9133e-02, PNorm = 85.6386, GNorm = 0.7672, lr_0 = 1.4567e-04
Loss = 8.4841e-02, PNorm = 85.6399, GNorm = 0.7174, lr_0 = 1.4557e-04
Loss = 8.6110e-02, PNorm = 85.6422, GNorm = 0.5248, lr_0 = 1.4547e-04
Loss = 7.3919e-02, PNorm = 85.6448, GNorm = 0.5961, lr_0 = 1.4537e-04
Loss = 7.7300e-02, PNorm = 85.6472, GNorm = 0.6108, lr_0 = 1.4527e-04
Loss = 7.5738e-02, PNorm = 85.6507, GNorm = 0.4167, lr_0 = 1.4517e-04
Loss = 9.5674e-02, PNorm = 85.6541, GNorm = 0.6734, lr_0 = 1.4507e-04
Loss = 7.5311e-02, PNorm = 85.6561, GNorm = 0.7154, lr_0 = 1.4497e-04
Loss = 8.0642e-02, PNorm = 85.6588, GNorm = 0.4849, lr_0 = 1.4487e-04
Loss = 8.3239e-02, PNorm = 85.6611, GNorm = 0.5231, lr_0 = 1.4477e-04
Loss = 8.0491e-02, PNorm = 85.6645, GNorm = 0.4809, lr_0 = 1.4467e-04
Loss = 9.0528e-02, PNorm = 85.6677, GNorm = 0.6181, lr_0 = 1.4457e-04
Loss = 8.5531e-02, PNorm = 85.6707, GNorm = 0.6642, lr_0 = 1.4447e-04
Loss = 7.2097e-02, PNorm = 85.6725, GNorm = 0.5607, lr_0 = 1.4438e-04
Loss = 7.8416e-02, PNorm = 85.6765, GNorm = 0.9278, lr_0 = 1.4428e-04
Loss = 8.1420e-02, PNorm = 85.6775, GNorm = 0.6680, lr_0 = 1.4418e-04
Loss = 8.6504e-02, PNorm = 85.6809, GNorm = 0.6425, lr_0 = 1.4408e-04
Loss = 7.4101e-02, PNorm = 85.6856, GNorm = 0.6261, lr_0 = 1.4398e-04
Loss = 7.3844e-02, PNorm = 85.6876, GNorm = 0.7312, lr_0 = 1.4388e-04
Loss = 7.8841e-02, PNorm = 85.6881, GNorm = 0.7032, lr_0 = 1.4378e-04
Loss = 6.7057e-02, PNorm = 85.6900, GNorm = 0.5607, lr_0 = 1.4368e-04
Loss = 9.3461e-02, PNorm = 85.6904, GNorm = 0.8870, lr_0 = 1.4359e-04
Loss = 7.1074e-02, PNorm = 85.6912, GNorm = 0.5512, lr_0 = 1.4349e-04
Loss = 8.4308e-02, PNorm = 85.6963, GNorm = 0.7494, lr_0 = 1.4339e-04
Loss = 8.8321e-02, PNorm = 85.6996, GNorm = 0.8278, lr_0 = 1.4329e-04
Loss = 8.8834e-02, PNorm = 85.7018, GNorm = 0.6294, lr_0 = 1.4319e-04
Loss = 8.5020e-02, PNorm = 85.7060, GNorm = 0.6924, lr_0 = 1.4310e-04
Loss = 1.0666e-01, PNorm = 85.7097, GNorm = 0.8092, lr_0 = 1.4300e-04
Loss = 9.2770e-02, PNorm = 85.7123, GNorm = 0.9181, lr_0 = 1.4290e-04
Loss = 7.6640e-02, PNorm = 85.7142, GNorm = 0.8436, lr_0 = 1.4280e-04
Loss = 6.9258e-02, PNorm = 85.7160, GNorm = 0.4534, lr_0 = 1.4270e-04
Loss = 6.3209e-02, PNorm = 85.7186, GNorm = 0.7312, lr_0 = 1.4261e-04
Loss = 7.7887e-02, PNorm = 85.7203, GNorm = 0.7093, lr_0 = 1.4251e-04
Loss = 8.9874e-02, PNorm = 85.7207, GNorm = 0.9060, lr_0 = 1.4241e-04
Loss = 7.7460e-02, PNorm = 85.7233, GNorm = 0.6879, lr_0 = 1.4231e-04
Loss = 7.4190e-02, PNorm = 85.7265, GNorm = 0.5722, lr_0 = 1.4222e-04
Loss = 8.0744e-02, PNorm = 85.7289, GNorm = 0.5415, lr_0 = 1.4212e-04
Loss = 8.7051e-02, PNorm = 85.7306, GNorm = 0.5605, lr_0 = 1.4202e-04
Loss = 7.2001e-02, PNorm = 85.7318, GNorm = 0.6044, lr_0 = 1.4192e-04
Loss = 7.3630e-02, PNorm = 85.7328, GNorm = 0.6327, lr_0 = 1.4183e-04
Loss = 8.6977e-02, PNorm = 85.7348, GNorm = 0.7764, lr_0 = 1.4173e-04
Loss = 8.1863e-02, PNorm = 85.7380, GNorm = 0.7185, lr_0 = 1.4163e-04
Loss = 8.5212e-02, PNorm = 85.7406, GNorm = 0.6629, lr_0 = 1.4153e-04
Loss = 7.8463e-02, PNorm = 85.7398, GNorm = 0.5283, lr_0 = 1.4144e-04
Loss = 8.4114e-02, PNorm = 85.7400, GNorm = 0.5799, lr_0 = 1.4134e-04
Loss = 8.3007e-02, PNorm = 85.7433, GNorm = 0.6892, lr_0 = 1.4124e-04
Loss = 8.5509e-02, PNorm = 85.7462, GNorm = 0.7529, lr_0 = 1.4115e-04
Loss = 8.4770e-02, PNorm = 85.7479, GNorm = 0.5110, lr_0 = 1.4105e-04
Loss = 7.9615e-02, PNorm = 85.7506, GNorm = 0.7621, lr_0 = 1.4095e-04
Loss = 8.2402e-02, PNorm = 85.7530, GNorm = 0.8757, lr_0 = 1.4086e-04
Loss = 7.8627e-02, PNorm = 85.7553, GNorm = 0.7208, lr_0 = 1.4076e-04
Loss = 7.5428e-02, PNorm = 85.7572, GNorm = 0.5113, lr_0 = 1.4066e-04
Loss = 7.3091e-02, PNorm = 85.7601, GNorm = 0.9447, lr_0 = 1.4057e-04
Loss = 8.1867e-02, PNorm = 85.7616, GNorm = 0.4683, lr_0 = 1.4047e-04
Loss = 8.8180e-02, PNorm = 85.7631, GNorm = 0.8290, lr_0 = 1.4038e-04
Loss = 8.7787e-02, PNorm = 85.7636, GNorm = 0.5550, lr_0 = 1.4028e-04
Loss = 7.4380e-02, PNorm = 85.7641, GNorm = 0.6466, lr_0 = 1.4018e-04
Loss = 8.2519e-02, PNorm = 85.7667, GNorm = 0.7850, lr_0 = 1.4009e-04
Loss = 8.2301e-02, PNorm = 85.7678, GNorm = 0.8039, lr_0 = 1.3999e-04
Loss = 9.0896e-02, PNorm = 85.7692, GNorm = 0.7462, lr_0 = 1.3990e-04
Loss = 7.9242e-02, PNorm = 85.7710, GNorm = 0.6553, lr_0 = 1.3980e-04
Loss = 7.9690e-02, PNorm = 85.7740, GNorm = 0.8224, lr_0 = 1.3970e-04
Loss = 7.7702e-02, PNorm = 85.7763, GNorm = 0.5294, lr_0 = 1.3961e-04
Loss = 9.4989e-02, PNorm = 85.7786, GNorm = 0.6269, lr_0 = 1.3951e-04
Loss = 7.9904e-02, PNorm = 85.7816, GNorm = 0.6338, lr_0 = 1.3942e-04
Loss = 9.2521e-02, PNorm = 85.7835, GNorm = 0.5103, lr_0 = 1.3932e-04
Loss = 7.8650e-02, PNorm = 85.7849, GNorm = 0.7214, lr_0 = 1.3923e-04
Loss = 8.6109e-02, PNorm = 85.7878, GNorm = 0.5842, lr_0 = 1.3913e-04
Loss = 7.2554e-02, PNorm = 85.7914, GNorm = 0.5322, lr_0 = 1.3904e-04
Loss = 8.9558e-02, PNorm = 85.7929, GNorm = 0.5800, lr_0 = 1.3894e-04
Validation mae = 0.225410
Epoch 26
Loss = 7.8283e-02, PNorm = 85.7960, GNorm = 0.5870, lr_0 = 1.3884e-04
Loss = 7.3439e-02, PNorm = 85.7971, GNorm = 0.6960, lr_0 = 1.3875e-04
Loss = 8.8242e-02, PNorm = 85.7976, GNorm = 0.7822, lr_0 = 1.3865e-04
Loss = 7.2427e-02, PNorm = 85.8001, GNorm = 0.6219, lr_0 = 1.3856e-04
Loss = 7.7120e-02, PNorm = 85.8016, GNorm = 0.5410, lr_0 = 1.3846e-04
Loss = 8.3775e-02, PNorm = 85.8030, GNorm = 0.6913, lr_0 = 1.3837e-04
Loss = 7.4672e-02, PNorm = 85.8054, GNorm = 0.6826, lr_0 = 1.3828e-04
Loss = 6.5777e-02, PNorm = 85.8094, GNorm = 0.6592, lr_0 = 1.3818e-04
Loss = 6.8508e-02, PNorm = 85.8122, GNorm = 0.5604, lr_0 = 1.3809e-04
Loss = 6.8730e-02, PNorm = 85.8140, GNorm = 0.5971, lr_0 = 1.3799e-04
Loss = 7.0535e-02, PNorm = 85.8151, GNorm = 0.4745, lr_0 = 1.3790e-04
Loss = 7.4042e-02, PNorm = 85.8175, GNorm = 0.6021, lr_0 = 1.3780e-04
Loss = 8.6667e-02, PNorm = 85.8197, GNorm = 0.4881, lr_0 = 1.3771e-04
Loss = 7.5756e-02, PNorm = 85.8219, GNorm = 0.5915, lr_0 = 1.3761e-04
Loss = 8.7810e-02, PNorm = 85.8258, GNorm = 0.6962, lr_0 = 1.3752e-04
Loss = 7.1923e-02, PNorm = 85.8289, GNorm = 0.5492, lr_0 = 1.3742e-04
Loss = 7.9629e-02, PNorm = 85.8320, GNorm = 0.5629, lr_0 = 1.3733e-04
Loss = 7.6637e-02, PNorm = 85.8345, GNorm = 0.6465, lr_0 = 1.3724e-04
Loss = 8.1570e-02, PNorm = 85.8325, GNorm = 0.6193, lr_0 = 1.3714e-04
Loss = 7.7080e-02, PNorm = 85.8318, GNorm = 0.7735, lr_0 = 1.3705e-04
Loss = 6.9658e-02, PNorm = 85.8346, GNorm = 0.4898, lr_0 = 1.3695e-04
Loss = 7.3948e-02, PNorm = 85.8377, GNorm = 0.6641, lr_0 = 1.3686e-04
Loss = 7.5647e-02, PNorm = 85.8411, GNorm = 0.7652, lr_0 = 1.3677e-04
Loss = 7.7046e-02, PNorm = 85.8435, GNorm = 0.5853, lr_0 = 1.3667e-04
Loss = 7.7921e-02, PNorm = 85.8458, GNorm = 0.5767, lr_0 = 1.3658e-04
Loss = 8.1150e-02, PNorm = 85.8478, GNorm = 0.7017, lr_0 = 1.3649e-04
Loss = 8.4652e-02, PNorm = 85.8484, GNorm = 0.5351, lr_0 = 1.3639e-04
Loss = 9.4151e-02, PNorm = 85.8496, GNorm = 0.8063, lr_0 = 1.3630e-04
Loss = 7.1934e-02, PNorm = 85.8516, GNorm = 0.5451, lr_0 = 1.3621e-04
Loss = 7.7304e-02, PNorm = 85.8560, GNorm = 0.6218, lr_0 = 1.3611e-04
Loss = 8.9924e-02, PNorm = 85.8588, GNorm = 0.6623, lr_0 = 1.3602e-04
Loss = 9.4477e-02, PNorm = 85.8612, GNorm = 0.8658, lr_0 = 1.3593e-04
Loss = 8.5299e-02, PNorm = 85.8643, GNorm = 0.6659, lr_0 = 1.3583e-04
Loss = 6.5300e-02, PNorm = 85.8664, GNorm = 0.6332, lr_0 = 1.3574e-04
Loss = 8.5913e-02, PNorm = 85.8689, GNorm = 0.6843, lr_0 = 1.3565e-04
Loss = 6.6377e-02, PNorm = 85.8701, GNorm = 0.4357, lr_0 = 1.3555e-04
Loss = 8.8624e-02, PNorm = 85.8715, GNorm = 0.6713, lr_0 = 1.3546e-04
Loss = 7.3058e-02, PNorm = 85.8733, GNorm = 0.6001, lr_0 = 1.3537e-04
Loss = 7.6516e-02, PNorm = 85.8753, GNorm = 0.5161, lr_0 = 1.3528e-04
Loss = 8.6591e-02, PNorm = 85.8767, GNorm = 0.5312, lr_0 = 1.3518e-04
Loss = 7.8331e-02, PNorm = 85.8770, GNorm = 0.5561, lr_0 = 1.3509e-04
Loss = 8.5496e-02, PNorm = 85.8786, GNorm = 0.6987, lr_0 = 1.3500e-04
Loss = 8.0916e-02, PNorm = 85.8810, GNorm = 0.5942, lr_0 = 1.3491e-04
Loss = 8.7675e-02, PNorm = 85.8821, GNorm = 0.7090, lr_0 = 1.3481e-04
Loss = 7.7023e-02, PNorm = 85.8852, GNorm = 0.6130, lr_0 = 1.3472e-04
Loss = 7.8455e-02, PNorm = 85.8876, GNorm = 0.8824, lr_0 = 1.3463e-04
Loss = 7.8649e-02, PNorm = 85.8880, GNorm = 0.5844, lr_0 = 1.3454e-04
Loss = 7.9015e-02, PNorm = 85.8887, GNorm = 0.6604, lr_0 = 1.3444e-04
Loss = 7.9025e-02, PNorm = 85.8896, GNorm = 0.5449, lr_0 = 1.3435e-04
Loss = 6.8971e-02, PNorm = 85.8923, GNorm = 0.7679, lr_0 = 1.3426e-04
Loss = 8.7229e-02, PNorm = 85.8966, GNorm = 0.7294, lr_0 = 1.3417e-04
Loss = 7.9511e-02, PNorm = 85.8986, GNorm = 0.6561, lr_0 = 1.3408e-04
Loss = 7.3160e-02, PNorm = 85.8996, GNorm = 0.6648, lr_0 = 1.3398e-04
Loss = 9.5662e-02, PNorm = 85.9000, GNorm = 0.4977, lr_0 = 1.3389e-04
Loss = 7.5000e-02, PNorm = 85.9015, GNorm = 0.5850, lr_0 = 1.3380e-04
Loss = 7.4132e-02, PNorm = 85.9069, GNorm = 0.6840, lr_0 = 1.3371e-04
Loss = 7.1941e-02, PNorm = 85.9079, GNorm = 0.4800, lr_0 = 1.3362e-04
Loss = 8.0380e-02, PNorm = 85.9084, GNorm = 0.7099, lr_0 = 1.3353e-04
Loss = 8.7381e-02, PNorm = 85.9102, GNorm = 0.7414, lr_0 = 1.3343e-04
Loss = 8.3480e-02, PNorm = 85.9103, GNorm = 0.5934, lr_0 = 1.3334e-04
Loss = 7.8768e-02, PNorm = 85.9121, GNorm = 0.6571, lr_0 = 1.3325e-04
Loss = 7.5176e-02, PNorm = 85.9145, GNorm = 0.6540, lr_0 = 1.3316e-04
Loss = 6.6057e-02, PNorm = 85.9162, GNorm = 0.5985, lr_0 = 1.3307e-04
Loss = 7.3765e-02, PNorm = 85.9183, GNorm = 0.4588, lr_0 = 1.3298e-04
Loss = 9.1479e-02, PNorm = 85.9205, GNorm = 0.5866, lr_0 = 1.3289e-04
Loss = 8.9824e-02, PNorm = 85.9233, GNorm = 0.7285, lr_0 = 1.3280e-04
Loss = 8.6897e-02, PNorm = 85.9267, GNorm = 1.1394, lr_0 = 1.3270e-04
Loss = 7.9226e-02, PNorm = 85.9293, GNorm = 0.6625, lr_0 = 1.3261e-04
Loss = 9.8474e-02, PNorm = 85.9307, GNorm = 0.6484, lr_0 = 1.3252e-04
Loss = 8.1823e-02, PNorm = 85.9328, GNorm = 0.6288, lr_0 = 1.3243e-04
Loss = 7.3297e-02, PNorm = 85.9354, GNorm = 0.7128, lr_0 = 1.3234e-04
Loss = 8.0851e-02, PNorm = 85.9380, GNorm = 0.5661, lr_0 = 1.3225e-04
Loss = 7.5209e-02, PNorm = 85.9410, GNorm = 0.6863, lr_0 = 1.3216e-04
Loss = 8.1137e-02, PNorm = 85.9438, GNorm = 0.6366, lr_0 = 1.3207e-04
Loss = 7.2772e-02, PNorm = 85.9442, GNorm = 0.6459, lr_0 = 1.3198e-04
Loss = 7.5018e-02, PNorm = 85.9446, GNorm = 0.4767, lr_0 = 1.3189e-04
Loss = 7.2520e-02, PNorm = 85.9474, GNorm = 0.5843, lr_0 = 1.3180e-04
Loss = 8.2832e-02, PNorm = 85.9493, GNorm = 0.7326, lr_0 = 1.3171e-04
Loss = 8.2179e-02, PNorm = 85.9506, GNorm = 0.7237, lr_0 = 1.3162e-04
Loss = 7.3573e-02, PNorm = 85.9538, GNorm = 0.6252, lr_0 = 1.3153e-04
Loss = 7.7006e-02, PNorm = 85.9571, GNorm = 0.4127, lr_0 = 1.3144e-04
Loss = 8.0164e-02, PNorm = 85.9598, GNorm = 0.6674, lr_0 = 1.3135e-04
Loss = 7.3970e-02, PNorm = 85.9602, GNorm = 0.7339, lr_0 = 1.3126e-04
Loss = 8.4586e-02, PNorm = 85.9616, GNorm = 0.7940, lr_0 = 1.3117e-04
Loss = 7.8792e-02, PNorm = 85.9628, GNorm = 0.6608, lr_0 = 1.3108e-04
Loss = 8.7390e-02, PNorm = 85.9657, GNorm = 0.7769, lr_0 = 1.3099e-04
Loss = 1.0057e-01, PNorm = 85.9678, GNorm = 0.5872, lr_0 = 1.3090e-04
Loss = 6.6785e-02, PNorm = 85.9701, GNorm = 0.4173, lr_0 = 1.3081e-04
Loss = 7.4869e-02, PNorm = 85.9719, GNorm = 0.7775, lr_0 = 1.3072e-04
Loss = 7.7578e-02, PNorm = 85.9741, GNorm = 0.5220, lr_0 = 1.3063e-04
Loss = 7.4720e-02, PNorm = 85.9754, GNorm = 0.6529, lr_0 = 1.3054e-04
Loss = 7.9536e-02, PNorm = 85.9763, GNorm = 0.5640, lr_0 = 1.3045e-04
Loss = 7.7012e-02, PNorm = 85.9785, GNorm = 0.5381, lr_0 = 1.3036e-04
Loss = 7.7347e-02, PNorm = 85.9820, GNorm = 0.7212, lr_0 = 1.3027e-04
Loss = 8.0990e-02, PNorm = 85.9848, GNorm = 0.5634, lr_0 = 1.3018e-04
Loss = 6.4560e-02, PNorm = 85.9864, GNorm = 0.5522, lr_0 = 1.3009e-04
Loss = 8.1182e-02, PNorm = 85.9870, GNorm = 0.7591, lr_0 = 1.3000e-04
Loss = 8.2155e-02, PNorm = 85.9885, GNorm = 0.4715, lr_0 = 1.2992e-04
Loss = 8.9704e-02, PNorm = 85.9906, GNorm = 0.6359, lr_0 = 1.2983e-04
Loss = 7.4051e-02, PNorm = 85.9955, GNorm = 0.5940, lr_0 = 1.2974e-04
Loss = 7.1057e-02, PNorm = 85.9968, GNorm = 0.5284, lr_0 = 1.2965e-04
Loss = 8.5861e-02, PNorm = 85.9976, GNorm = 0.6909, lr_0 = 1.2956e-04
Loss = 7.7002e-02, PNorm = 86.0000, GNorm = 0.6891, lr_0 = 1.2947e-04
Loss = 7.0657e-02, PNorm = 86.0048, GNorm = 0.4892, lr_0 = 1.2938e-04
Loss = 7.1440e-02, PNorm = 86.0077, GNorm = 0.7196, lr_0 = 1.2929e-04
Loss = 7.8982e-02, PNorm = 86.0079, GNorm = 0.6276, lr_0 = 1.2921e-04
Loss = 8.9763e-02, PNorm = 86.0092, GNorm = 0.6599, lr_0 = 1.2912e-04
Loss = 7.9942e-02, PNorm = 86.0123, GNorm = 0.8628, lr_0 = 1.2903e-04
Loss = 7.9019e-02, PNorm = 86.0145, GNorm = 0.8588, lr_0 = 1.2894e-04
Loss = 7.6477e-02, PNorm = 86.0167, GNorm = 0.6733, lr_0 = 1.2885e-04
Loss = 8.4068e-02, PNorm = 86.0205, GNorm = 0.7545, lr_0 = 1.2876e-04
Loss = 8.0909e-02, PNorm = 86.0234, GNorm = 0.7562, lr_0 = 1.2867e-04
Loss = 7.5150e-02, PNorm = 86.0264, GNorm = 0.6079, lr_0 = 1.2859e-04
Loss = 8.1064e-02, PNorm = 86.0265, GNorm = 0.6337, lr_0 = 1.2850e-04
Loss = 8.0034e-02, PNorm = 86.0280, GNorm = 0.6223, lr_0 = 1.2841e-04
Loss = 8.1707e-02, PNorm = 86.0303, GNorm = 0.5917, lr_0 = 1.2832e-04
Loss = 7.4446e-02, PNorm = 86.0320, GNorm = 0.4919, lr_0 = 1.2823e-04
Loss = 8.4894e-02, PNorm = 86.0327, GNorm = 0.7434, lr_0 = 1.2815e-04
Loss = 8.6642e-02, PNorm = 86.0341, GNorm = 0.9796, lr_0 = 1.2806e-04
Loss = 8.7741e-02, PNorm = 86.0350, GNorm = 0.6088, lr_0 = 1.2797e-04
Validation mae = 0.226670
Epoch 27
Loss = 6.8499e-02, PNorm = 86.0358, GNorm = 0.4360, lr_0 = 1.2788e-04
Loss = 7.4903e-02, PNorm = 86.0375, GNorm = 0.6065, lr_0 = 1.2780e-04
Loss = 7.0090e-02, PNorm = 86.0395, GNorm = 0.4176, lr_0 = 1.2771e-04
Loss = 7.0719e-02, PNorm = 86.0408, GNorm = 0.5546, lr_0 = 1.2762e-04
Loss = 6.9192e-02, PNorm = 86.0442, GNorm = 0.5136, lr_0 = 1.2753e-04
Loss = 7.3573e-02, PNorm = 86.0472, GNorm = 0.6402, lr_0 = 1.2745e-04
Loss = 6.4757e-02, PNorm = 86.0495, GNorm = 0.5803, lr_0 = 1.2736e-04
Loss = 6.9825e-02, PNorm = 86.0506, GNorm = 0.5794, lr_0 = 1.2727e-04
Loss = 7.5788e-02, PNorm = 86.0529, GNorm = 0.6612, lr_0 = 1.2718e-04
Loss = 8.0464e-02, PNorm = 86.0556, GNorm = 0.5871, lr_0 = 1.2710e-04
Loss = 7.3559e-02, PNorm = 86.0586, GNorm = 0.7174, lr_0 = 1.2701e-04
Loss = 7.7729e-02, PNorm = 86.0598, GNorm = 0.6635, lr_0 = 1.2692e-04
Loss = 8.4137e-02, PNorm = 86.0603, GNorm = 0.5618, lr_0 = 1.2684e-04
Loss = 6.8803e-02, PNorm = 86.0602, GNorm = 0.6188, lr_0 = 1.2675e-04
Loss = 6.0928e-02, PNorm = 86.0610, GNorm = 0.5967, lr_0 = 1.2666e-04
Loss = 6.6162e-02, PNorm = 86.0612, GNorm = 0.4572, lr_0 = 1.2658e-04
Loss = 8.1053e-02, PNorm = 86.0632, GNorm = 0.6019, lr_0 = 1.2649e-04
Loss = 7.7534e-02, PNorm = 86.0680, GNorm = 0.5374, lr_0 = 1.2640e-04
Loss = 8.0866e-02, PNorm = 86.0710, GNorm = 0.6219, lr_0 = 1.2632e-04
Loss = 7.0784e-02, PNorm = 86.0720, GNorm = 0.5198, lr_0 = 1.2623e-04
Loss = 7.6513e-02, PNorm = 86.0739, GNorm = 0.6234, lr_0 = 1.2614e-04
Loss = 8.3470e-02, PNorm = 86.0764, GNorm = 0.5883, lr_0 = 1.2606e-04
Loss = 8.0781e-02, PNorm = 86.0788, GNorm = 0.6037, lr_0 = 1.2597e-04
Loss = 8.4476e-02, PNorm = 86.0821, GNorm = 0.6238, lr_0 = 1.2588e-04
Loss = 7.1879e-02, PNorm = 86.0845, GNorm = 0.5757, lr_0 = 1.2580e-04
Loss = 8.3957e-02, PNorm = 86.0889, GNorm = 0.5958, lr_0 = 1.2571e-04
Loss = 7.3082e-02, PNorm = 86.0914, GNorm = 0.6829, lr_0 = 1.2563e-04
Loss = 7.9991e-02, PNorm = 86.0926, GNorm = 0.4163, lr_0 = 1.2554e-04
Loss = 7.6415e-02, PNorm = 86.0949, GNorm = 0.7551, lr_0 = 1.2545e-04
Loss = 7.0245e-02, PNorm = 86.0962, GNorm = 0.4911, lr_0 = 1.2537e-04
Loss = 7.8223e-02, PNorm = 86.0974, GNorm = 0.6585, lr_0 = 1.2528e-04
Loss = 7.5973e-02, PNorm = 86.0991, GNorm = 0.7386, lr_0 = 1.2520e-04
Loss = 7.7335e-02, PNorm = 86.0984, GNorm = 0.5900, lr_0 = 1.2511e-04
Loss = 6.4094e-02, PNorm = 86.0992, GNorm = 0.5558, lr_0 = 1.2502e-04
Loss = 8.0515e-02, PNorm = 86.1013, GNorm = 0.6404, lr_0 = 1.2494e-04
Loss = 7.3196e-02, PNorm = 86.1031, GNorm = 0.4909, lr_0 = 1.2485e-04
Loss = 7.5658e-02, PNorm = 86.1058, GNorm = 0.7025, lr_0 = 1.2477e-04
Loss = 6.8896e-02, PNorm = 86.1086, GNorm = 0.6633, lr_0 = 1.2468e-04
Loss = 7.3144e-02, PNorm = 86.1103, GNorm = 0.7574, lr_0 = 1.2460e-04
Loss = 7.6738e-02, PNorm = 86.1131, GNorm = 0.5554, lr_0 = 1.2451e-04
Loss = 7.4919e-02, PNorm = 86.1151, GNorm = 0.6670, lr_0 = 1.2443e-04
Loss = 7.2188e-02, PNorm = 86.1162, GNorm = 0.5572, lr_0 = 1.2434e-04
Loss = 7.9355e-02, PNorm = 86.1200, GNorm = 0.6664, lr_0 = 1.2426e-04
Loss = 7.8021e-02, PNorm = 86.1229, GNorm = 0.5848, lr_0 = 1.2417e-04
Loss = 8.0246e-02, PNorm = 86.1249, GNorm = 0.5955, lr_0 = 1.2409e-04
Loss = 7.5464e-02, PNorm = 86.1279, GNorm = 0.5527, lr_0 = 1.2400e-04
Loss = 7.5817e-02, PNorm = 86.1300, GNorm = 0.5644, lr_0 = 1.2392e-04
Loss = 8.1367e-02, PNorm = 86.1311, GNorm = 0.6603, lr_0 = 1.2383e-04
Loss = 7.5941e-02, PNorm = 86.1329, GNorm = 0.5919, lr_0 = 1.2375e-04
Loss = 7.9521e-02, PNorm = 86.1344, GNorm = 0.7871, lr_0 = 1.2366e-04
Loss = 6.7611e-02, PNorm = 86.1360, GNorm = 0.7162, lr_0 = 1.2358e-04
Loss = 7.8171e-02, PNorm = 86.1366, GNorm = 0.5170, lr_0 = 1.2349e-04
Loss = 8.1456e-02, PNorm = 86.1399, GNorm = 0.5403, lr_0 = 1.2341e-04
Loss = 8.3377e-02, PNorm = 86.1420, GNorm = 0.6493, lr_0 = 1.2332e-04
Loss = 7.8281e-02, PNorm = 86.1442, GNorm = 0.6441, lr_0 = 1.2324e-04
Loss = 8.0663e-02, PNorm = 86.1473, GNorm = 0.5614, lr_0 = 1.2315e-04
Loss = 8.4349e-02, PNorm = 86.1497, GNorm = 0.6532, lr_0 = 1.2307e-04
Loss = 7.9447e-02, PNorm = 86.1521, GNorm = 0.6406, lr_0 = 1.2298e-04
Loss = 7.6881e-02, PNorm = 86.1535, GNorm = 0.5715, lr_0 = 1.2290e-04
Loss = 8.5808e-02, PNorm = 86.1561, GNorm = 0.7751, lr_0 = 1.2282e-04
Loss = 7.6117e-02, PNorm = 86.1592, GNorm = 0.7660, lr_0 = 1.2273e-04
Loss = 6.9787e-02, PNorm = 86.1608, GNorm = 0.5168, lr_0 = 1.2265e-04
Loss = 6.8618e-02, PNorm = 86.1624, GNorm = 0.5196, lr_0 = 1.2256e-04
Loss = 7.9296e-02, PNorm = 86.1636, GNorm = 0.6073, lr_0 = 1.2248e-04
Loss = 7.7116e-02, PNorm = 86.1645, GNorm = 0.8263, lr_0 = 1.2240e-04
Loss = 9.0040e-02, PNorm = 86.1669, GNorm = 0.5911, lr_0 = 1.2231e-04
Loss = 8.1302e-02, PNorm = 86.1696, GNorm = 0.5709, lr_0 = 1.2223e-04
Loss = 7.6963e-02, PNorm = 86.1708, GNorm = 0.6156, lr_0 = 1.2214e-04
Loss = 8.2615e-02, PNorm = 86.1716, GNorm = 0.7563, lr_0 = 1.2206e-04
Loss = 6.9893e-02, PNorm = 86.1744, GNorm = 0.5824, lr_0 = 1.2198e-04
Loss = 8.6087e-02, PNorm = 86.1777, GNorm = 0.6132, lr_0 = 1.2189e-04
Loss = 7.8833e-02, PNorm = 86.1787, GNorm = 0.6464, lr_0 = 1.2181e-04
Loss = 7.5635e-02, PNorm = 86.1808, GNorm = 0.8219, lr_0 = 1.2173e-04
Loss = 7.6492e-02, PNorm = 86.1842, GNorm = 0.7457, lr_0 = 1.2164e-04
Loss = 8.6408e-02, PNorm = 86.1875, GNorm = 0.6385, lr_0 = 1.2156e-04
Loss = 7.9265e-02, PNorm = 86.1883, GNorm = 0.5753, lr_0 = 1.2148e-04
Loss = 7.4838e-02, PNorm = 86.1890, GNorm = 0.6215, lr_0 = 1.2139e-04
Loss = 7.0155e-02, PNorm = 86.1892, GNorm = 0.6329, lr_0 = 1.2131e-04
Loss = 7.4923e-02, PNorm = 86.1895, GNorm = 0.4601, lr_0 = 1.2123e-04
Loss = 7.6188e-02, PNorm = 86.1893, GNorm = 0.7163, lr_0 = 1.2114e-04
Loss = 7.7667e-02, PNorm = 86.1913, GNorm = 0.6272, lr_0 = 1.2106e-04
Loss = 7.6119e-02, PNorm = 86.1929, GNorm = 0.5544, lr_0 = 1.2098e-04
Loss = 7.8215e-02, PNorm = 86.1942, GNorm = 0.6592, lr_0 = 1.2090e-04
Loss = 7.8379e-02, PNorm = 86.1957, GNorm = 0.6856, lr_0 = 1.2081e-04
Loss = 7.4484e-02, PNorm = 86.1960, GNorm = 0.5534, lr_0 = 1.2073e-04
Loss = 7.3896e-02, PNorm = 86.1981, GNorm = 0.9787, lr_0 = 1.2065e-04
Loss = 7.9585e-02, PNorm = 86.2005, GNorm = 0.5918, lr_0 = 1.2056e-04
Loss = 7.8400e-02, PNorm = 86.2003, GNorm = 0.5470, lr_0 = 1.2048e-04
Loss = 7.4148e-02, PNorm = 86.2010, GNorm = 0.7522, lr_0 = 1.2040e-04
Loss = 7.6729e-02, PNorm = 86.2030, GNorm = 0.5925, lr_0 = 1.2032e-04
Loss = 8.8028e-02, PNorm = 86.2060, GNorm = 0.6450, lr_0 = 1.2023e-04
Loss = 6.8390e-02, PNorm = 86.2074, GNorm = 0.4706, lr_0 = 1.2015e-04
Loss = 7.3500e-02, PNorm = 86.2081, GNorm = 0.5168, lr_0 = 1.2007e-04
Loss = 7.7613e-02, PNorm = 86.2130, GNorm = 0.4358, lr_0 = 1.1999e-04
Loss = 8.2545e-02, PNorm = 86.2157, GNorm = 0.6835, lr_0 = 1.1991e-04
Loss = 8.7117e-02, PNorm = 86.2182, GNorm = 0.5641, lr_0 = 1.1982e-04
Loss = 7.0535e-02, PNorm = 86.2201, GNorm = 0.6897, lr_0 = 1.1974e-04
Loss = 8.1130e-02, PNorm = 86.2198, GNorm = 0.8901, lr_0 = 1.1966e-04
Loss = 8.3701e-02, PNorm = 86.2221, GNorm = 0.8667, lr_0 = 1.1958e-04
Loss = 8.3392e-02, PNorm = 86.2221, GNorm = 0.8280, lr_0 = 1.1950e-04
Loss = 7.7586e-02, PNorm = 86.2221, GNorm = 0.7137, lr_0 = 1.1941e-04
Loss = 9.5239e-02, PNorm = 86.2248, GNorm = 0.9814, lr_0 = 1.1933e-04
Loss = 9.3441e-02, PNorm = 86.2277, GNorm = 0.5842, lr_0 = 1.1925e-04
Loss = 8.0352e-02, PNorm = 86.2306, GNorm = 0.4792, lr_0 = 1.1917e-04
Loss = 7.9475e-02, PNorm = 86.2329, GNorm = 0.8929, lr_0 = 1.1909e-04
Loss = 7.1633e-02, PNorm = 86.2351, GNorm = 0.6685, lr_0 = 1.1901e-04
Loss = 8.4441e-02, PNorm = 86.2359, GNorm = 0.5999, lr_0 = 1.1892e-04
Loss = 8.2300e-02, PNorm = 86.2374, GNorm = 0.5778, lr_0 = 1.1884e-04
Loss = 7.7294e-02, PNorm = 86.2400, GNorm = 0.7352, lr_0 = 1.1876e-04
Loss = 7.4147e-02, PNorm = 86.2425, GNorm = 0.6181, lr_0 = 1.1868e-04
Loss = 7.7892e-02, PNorm = 86.2437, GNorm = 0.7636, lr_0 = 1.1860e-04
Loss = 8.0446e-02, PNorm = 86.2460, GNorm = 0.6157, lr_0 = 1.1852e-04
Loss = 7.9054e-02, PNorm = 86.2486, GNorm = 0.8117, lr_0 = 1.1844e-04
Loss = 7.5246e-02, PNorm = 86.2497, GNorm = 0.6109, lr_0 = 1.1835e-04
Loss = 7.8601e-02, PNorm = 86.2487, GNorm = 0.9056, lr_0 = 1.1827e-04
Loss = 8.5358e-02, PNorm = 86.2501, GNorm = 0.6746, lr_0 = 1.1819e-04
Loss = 7.9268e-02, PNorm = 86.2521, GNorm = 0.7063, lr_0 = 1.1811e-04
Loss = 7.2448e-02, PNorm = 86.2531, GNorm = 0.5230, lr_0 = 1.1803e-04
Loss = 8.0563e-02, PNorm = 86.2564, GNorm = 0.4982, lr_0 = 1.1795e-04
Loss = 7.0120e-02, PNorm = 86.2596, GNorm = 0.6073, lr_0 = 1.1787e-04
Validation mae = 0.227132
Epoch 28
Loss = 7.6921e-02, PNorm = 86.2624, GNorm = 0.6594, lr_0 = 1.1779e-04
Loss = 6.8120e-02, PNorm = 86.2628, GNorm = 0.5732, lr_0 = 1.1771e-04
Loss = 7.7715e-02, PNorm = 86.2641, GNorm = 0.8706, lr_0 = 1.1763e-04
Loss = 6.7286e-02, PNorm = 86.2655, GNorm = 0.4655, lr_0 = 1.1755e-04
Loss = 6.7549e-02, PNorm = 86.2674, GNorm = 0.6985, lr_0 = 1.1747e-04
Loss = 7.7191e-02, PNorm = 86.2690, GNorm = 0.5290, lr_0 = 1.1739e-04
Loss = 7.8662e-02, PNorm = 86.2715, GNorm = 0.5457, lr_0 = 1.1730e-04
Loss = 6.9270e-02, PNorm = 86.2731, GNorm = 0.7218, lr_0 = 1.1722e-04
Loss = 6.8941e-02, PNorm = 86.2739, GNorm = 0.6653, lr_0 = 1.1714e-04
Loss = 8.5279e-02, PNorm = 86.2755, GNorm = 0.6115, lr_0 = 1.1706e-04
Loss = 6.8450e-02, PNorm = 86.2787, GNorm = 0.5273, lr_0 = 1.1698e-04
Loss = 8.1916e-02, PNorm = 86.2807, GNorm = 0.7966, lr_0 = 1.1690e-04
Loss = 7.2150e-02, PNorm = 86.2825, GNorm = 0.6791, lr_0 = 1.1682e-04
Loss = 7.1030e-02, PNorm = 86.2842, GNorm = 0.7301, lr_0 = 1.1674e-04
Loss = 7.6181e-02, PNorm = 86.2857, GNorm = 1.1180, lr_0 = 1.1666e-04
Loss = 7.8713e-02, PNorm = 86.2858, GNorm = 0.6285, lr_0 = 1.1658e-04
Loss = 7.5885e-02, PNorm = 86.2871, GNorm = 0.6432, lr_0 = 1.1650e-04
Loss = 8.3362e-02, PNorm = 86.2907, GNorm = 0.5875, lr_0 = 1.1642e-04
Loss = 7.7172e-02, PNorm = 86.2926, GNorm = 0.6963, lr_0 = 1.1634e-04
Loss = 8.3762e-02, PNorm = 86.2933, GNorm = 0.5472, lr_0 = 1.1626e-04
Loss = 7.8955e-02, PNorm = 86.2935, GNorm = 0.5761, lr_0 = 1.1618e-04
Loss = 7.1065e-02, PNorm = 86.2946, GNorm = 0.5191, lr_0 = 1.1611e-04
Loss = 8.8092e-02, PNorm = 86.2955, GNorm = 0.7970, lr_0 = 1.1603e-04
Loss = 8.8665e-02, PNorm = 86.2965, GNorm = 0.9108, lr_0 = 1.1595e-04
Loss = 7.1063e-02, PNorm = 86.2998, GNorm = 0.6116, lr_0 = 1.1587e-04
Loss = 8.5398e-02, PNorm = 86.3022, GNorm = 0.7620, lr_0 = 1.1579e-04
Loss = 7.4416e-02, PNorm = 86.3022, GNorm = 0.5413, lr_0 = 1.1571e-04
Loss = 7.5758e-02, PNorm = 86.3045, GNorm = 0.5732, lr_0 = 1.1563e-04
Loss = 7.3717e-02, PNorm = 86.3074, GNorm = 0.7074, lr_0 = 1.1555e-04
Loss = 6.7878e-02, PNorm = 86.3082, GNorm = 0.7429, lr_0 = 1.1547e-04
Loss = 7.1667e-02, PNorm = 86.3088, GNorm = 0.4695, lr_0 = 1.1539e-04
Loss = 6.7570e-02, PNorm = 86.3105, GNorm = 0.5379, lr_0 = 1.1531e-04
Loss = 7.2534e-02, PNorm = 86.3122, GNorm = 0.4870, lr_0 = 1.1523e-04
Loss = 7.8064e-02, PNorm = 86.3126, GNorm = 0.6564, lr_0 = 1.1515e-04
Loss = 7.8197e-02, PNorm = 86.3133, GNorm = 0.6241, lr_0 = 1.1508e-04
Loss = 7.8157e-02, PNorm = 86.3146, GNorm = 0.7064, lr_0 = 1.1500e-04
Loss = 7.8661e-02, PNorm = 86.3169, GNorm = 0.6684, lr_0 = 1.1492e-04
Loss = 6.7592e-02, PNorm = 86.3179, GNorm = 0.5320, lr_0 = 1.1484e-04
Loss = 7.7236e-02, PNorm = 86.3193, GNorm = 0.6231, lr_0 = 1.1476e-04
Loss = 7.0170e-02, PNorm = 86.3210, GNorm = 0.5102, lr_0 = 1.1468e-04
Loss = 7.2110e-02, PNorm = 86.3239, GNorm = 0.5619, lr_0 = 1.1460e-04
Loss = 7.2610e-02, PNorm = 86.3262, GNorm = 0.5210, lr_0 = 1.1452e-04
Loss = 7.8490e-02, PNorm = 86.3288, GNorm = 0.5864, lr_0 = 1.1445e-04
Loss = 7.8372e-02, PNorm = 86.3295, GNorm = 0.7861, lr_0 = 1.1437e-04
Loss = 7.3244e-02, PNorm = 86.3315, GNorm = 1.0360, lr_0 = 1.1429e-04
Loss = 7.9881e-02, PNorm = 86.3321, GNorm = 0.6014, lr_0 = 1.1421e-04
Loss = 7.2867e-02, PNorm = 86.3319, GNorm = 0.5043, lr_0 = 1.1413e-04
Loss = 7.4727e-02, PNorm = 86.3325, GNorm = 0.7480, lr_0 = 1.1405e-04
Loss = 6.3375e-02, PNorm = 86.3335, GNorm = 0.4529, lr_0 = 1.1398e-04
Loss = 7.4894e-02, PNorm = 86.3359, GNorm = 0.5638, lr_0 = 1.1390e-04
Loss = 7.9534e-02, PNorm = 86.3380, GNorm = 0.6135, lr_0 = 1.1382e-04
Loss = 7.1704e-02, PNorm = 86.3412, GNorm = 1.0464, lr_0 = 1.1374e-04
Loss = 7.3934e-02, PNorm = 86.3444, GNorm = 0.6152, lr_0 = 1.1366e-04
Loss = 7.1856e-02, PNorm = 86.3475, GNorm = 0.7778, lr_0 = 1.1359e-04
Loss = 8.0863e-02, PNorm = 86.3475, GNorm = 0.7367, lr_0 = 1.1351e-04
Loss = 8.1673e-02, PNorm = 86.3476, GNorm = 0.6396, lr_0 = 1.1343e-04
Loss = 7.6707e-02, PNorm = 86.3493, GNorm = 0.7527, lr_0 = 1.1335e-04
Loss = 6.7625e-02, PNorm = 86.3518, GNorm = 0.6409, lr_0 = 1.1328e-04
Loss = 7.7581e-02, PNorm = 86.3515, GNorm = 0.5687, lr_0 = 1.1320e-04
Loss = 6.9325e-02, PNorm = 86.3521, GNorm = 0.4623, lr_0 = 1.1312e-04
Loss = 7.1019e-02, PNorm = 86.3534, GNorm = 0.6811, lr_0 = 1.1304e-04
Loss = 7.3723e-02, PNorm = 86.3544, GNorm = 0.7072, lr_0 = 1.1297e-04
Loss = 7.7180e-02, PNorm = 86.3553, GNorm = 0.6318, lr_0 = 1.1289e-04
Loss = 7.4488e-02, PNorm = 86.3558, GNorm = 0.8581, lr_0 = 1.1281e-04
Loss = 7.3728e-02, PNorm = 86.3575, GNorm = 0.7248, lr_0 = 1.1273e-04
Loss = 7.5930e-02, PNorm = 86.3604, GNorm = 0.6704, lr_0 = 1.1266e-04
Loss = 8.2347e-02, PNorm = 86.3617, GNorm = 0.8148, lr_0 = 1.1258e-04
Loss = 6.7236e-02, PNorm = 86.3633, GNorm = 0.6920, lr_0 = 1.1250e-04
Loss = 8.2807e-02, PNorm = 86.3637, GNorm = 0.7615, lr_0 = 1.1243e-04
Loss = 6.8331e-02, PNorm = 86.3654, GNorm = 0.6716, lr_0 = 1.1235e-04
Loss = 8.4297e-02, PNorm = 86.3659, GNorm = 0.8407, lr_0 = 1.1227e-04
Loss = 6.6073e-02, PNorm = 86.3661, GNorm = 0.5188, lr_0 = 1.1219e-04
Loss = 7.8340e-02, PNorm = 86.3663, GNorm = 0.7309, lr_0 = 1.1212e-04
Loss = 9.7157e-02, PNorm = 86.3702, GNorm = 0.6237, lr_0 = 1.1204e-04
Loss = 8.0643e-02, PNorm = 86.3738, GNorm = 0.7039, lr_0 = 1.1196e-04
Loss = 7.5342e-02, PNorm = 86.3770, GNorm = 0.7004, lr_0 = 1.1189e-04
Loss = 8.2538e-02, PNorm = 86.3776, GNorm = 0.8160, lr_0 = 1.1181e-04
Loss = 7.1061e-02, PNorm = 86.3783, GNorm = 0.7738, lr_0 = 1.1173e-04
Loss = 7.7094e-02, PNorm = 86.3807, GNorm = 0.7166, lr_0 = 1.1166e-04
Loss = 7.7313e-02, PNorm = 86.3812, GNorm = 0.5658, lr_0 = 1.1158e-04
Loss = 7.0774e-02, PNorm = 86.3827, GNorm = 0.6494, lr_0 = 1.1150e-04
Loss = 7.0316e-02, PNorm = 86.3848, GNorm = 0.6428, lr_0 = 1.1143e-04
Loss = 7.7804e-02, PNorm = 86.3864, GNorm = 0.6656, lr_0 = 1.1135e-04
Loss = 7.5124e-02, PNorm = 86.3882, GNorm = 0.5322, lr_0 = 1.1128e-04
Loss = 8.5925e-02, PNorm = 86.3909, GNorm = 0.6384, lr_0 = 1.1120e-04
Loss = 9.8253e-02, PNorm = 86.3947, GNorm = 0.6011, lr_0 = 1.1112e-04
Loss = 7.5969e-02, PNorm = 86.3981, GNorm = 0.5831, lr_0 = 1.1105e-04
Loss = 8.5878e-02, PNorm = 86.3995, GNorm = 0.5709, lr_0 = 1.1097e-04
Loss = 7.4504e-02, PNorm = 86.4000, GNorm = 0.6542, lr_0 = 1.1089e-04
Loss = 8.0193e-02, PNorm = 86.4015, GNorm = 0.6247, lr_0 = 1.1082e-04
Loss = 7.5049e-02, PNorm = 86.4020, GNorm = 0.5029, lr_0 = 1.1074e-04
Loss = 8.3144e-02, PNorm = 86.4036, GNorm = 0.8702, lr_0 = 1.1067e-04
Loss = 7.7445e-02, PNorm = 86.4053, GNorm = 0.5908, lr_0 = 1.1059e-04
Loss = 7.2985e-02, PNorm = 86.4085, GNorm = 0.8006, lr_0 = 1.1052e-04
Loss = 8.5126e-02, PNorm = 86.4124, GNorm = 0.5722, lr_0 = 1.1044e-04
Loss = 7.4646e-02, PNorm = 86.4133, GNorm = 0.5910, lr_0 = 1.1036e-04
Loss = 7.9541e-02, PNorm = 86.4143, GNorm = 0.5684, lr_0 = 1.1029e-04
Loss = 6.8069e-02, PNorm = 86.4159, GNorm = 0.8319, lr_0 = 1.1021e-04
Loss = 9.0145e-02, PNorm = 86.4171, GNorm = 0.5877, lr_0 = 1.1014e-04
Loss = 7.2274e-02, PNorm = 86.4182, GNorm = 0.5476, lr_0 = 1.1006e-04
Loss = 7.8071e-02, PNorm = 86.4190, GNorm = 0.5938, lr_0 = 1.0999e-04
Loss = 6.8767e-02, PNorm = 86.4209, GNorm = 0.5526, lr_0 = 1.0991e-04
Loss = 7.4175e-02, PNorm = 86.4236, GNorm = 0.7149, lr_0 = 1.0984e-04
Loss = 8.3338e-02, PNorm = 86.4275, GNorm = 0.7578, lr_0 = 1.0976e-04
Loss = 6.7978e-02, PNorm = 86.4291, GNorm = 0.5029, lr_0 = 1.0969e-04
Loss = 7.6170e-02, PNorm = 86.4294, GNorm = 0.5701, lr_0 = 1.0961e-04
Loss = 8.5476e-02, PNorm = 86.4305, GNorm = 0.6231, lr_0 = 1.0954e-04
Loss = 6.9174e-02, PNorm = 86.4320, GNorm = 0.5696, lr_0 = 1.0946e-04
Loss = 8.1159e-02, PNorm = 86.4333, GNorm = 0.7673, lr_0 = 1.0939e-04
Loss = 7.0639e-02, PNorm = 86.4346, GNorm = 0.5397, lr_0 = 1.0931e-04
Loss = 7.5767e-02, PNorm = 86.4362, GNorm = 0.7018, lr_0 = 1.0924e-04
Loss = 8.3129e-02, PNorm = 86.4366, GNorm = 0.5540, lr_0 = 1.0916e-04
Loss = 8.6788e-02, PNorm = 86.4386, GNorm = 0.6432, lr_0 = 1.0909e-04
Loss = 6.7763e-02, PNorm = 86.4399, GNorm = 0.4997, lr_0 = 1.0901e-04
Loss = 8.3541e-02, PNorm = 86.4419, GNorm = 0.5704, lr_0 = 1.0894e-04
Loss = 6.7519e-02, PNorm = 86.4435, GNorm = 0.6013, lr_0 = 1.0886e-04
Loss = 7.4019e-02, PNorm = 86.4446, GNorm = 0.6837, lr_0 = 1.0879e-04
Loss = 6.5086e-02, PNorm = 86.4456, GNorm = 0.5423, lr_0 = 1.0871e-04
Loss = 7.7069e-02, PNorm = 86.4470, GNorm = 0.7503, lr_0 = 1.0864e-04
Loss = 7.7858e-02, PNorm = 86.4492, GNorm = 0.6684, lr_0 = 1.0856e-04
Validation mae = 0.226629
Epoch 29
Loss = 6.6380e-02, PNorm = 86.4511, GNorm = 0.5230, lr_0 = 1.0849e-04
Loss = 6.2866e-02, PNorm = 86.4517, GNorm = 0.6513, lr_0 = 1.0841e-04
Loss = 8.2188e-02, PNorm = 86.4527, GNorm = 0.7530, lr_0 = 1.0834e-04
Loss = 6.5985e-02, PNorm = 86.4536, GNorm = 1.0064, lr_0 = 1.0827e-04
Loss = 7.4767e-02, PNorm = 86.4551, GNorm = 0.5874, lr_0 = 1.0819e-04
Loss = 7.0488e-02, PNorm = 86.4570, GNorm = 0.5258, lr_0 = 1.0812e-04
Loss = 6.8471e-02, PNorm = 86.4601, GNorm = 0.5630, lr_0 = 1.0804e-04
Loss = 7.7514e-02, PNorm = 86.4629, GNorm = 0.6531, lr_0 = 1.0797e-04
Loss = 7.1868e-02, PNorm = 86.4628, GNorm = 0.4768, lr_0 = 1.0790e-04
Loss = 7.8529e-02, PNorm = 86.4634, GNorm = 0.7232, lr_0 = 1.0782e-04
Loss = 7.1851e-02, PNorm = 86.4635, GNorm = 0.5333, lr_0 = 1.0775e-04
Loss = 7.6169e-02, PNorm = 86.4649, GNorm = 0.6730, lr_0 = 1.0767e-04
Loss = 7.4014e-02, PNorm = 86.4670, GNorm = 0.5710, lr_0 = 1.0760e-04
Loss = 8.6641e-02, PNorm = 86.4703, GNorm = 0.4506, lr_0 = 1.0753e-04
Loss = 7.1870e-02, PNorm = 86.4726, GNorm = 0.5245, lr_0 = 1.0745e-04
Loss = 7.1201e-02, PNorm = 86.4736, GNorm = 0.6940, lr_0 = 1.0738e-04
Loss = 7.8969e-02, PNorm = 86.4740, GNorm = 0.9227, lr_0 = 1.0731e-04
Loss = 7.2219e-02, PNorm = 86.4738, GNorm = 0.6549, lr_0 = 1.0723e-04
Loss = 8.0223e-02, PNorm = 86.4752, GNorm = 0.6796, lr_0 = 1.0716e-04
Loss = 7.1492e-02, PNorm = 86.4782, GNorm = 0.6866, lr_0 = 1.0709e-04
Loss = 6.8236e-02, PNorm = 86.4790, GNorm = 0.5238, lr_0 = 1.0701e-04
Loss = 6.7927e-02, PNorm = 86.4792, GNorm = 0.6488, lr_0 = 1.0694e-04
Loss = 6.7067e-02, PNorm = 86.4802, GNorm = 0.5840, lr_0 = 1.0687e-04
Loss = 6.8708e-02, PNorm = 86.4817, GNorm = 0.6378, lr_0 = 1.0679e-04
Loss = 7.7069e-02, PNorm = 86.4842, GNorm = 0.5289, lr_0 = 1.0672e-04
Loss = 9.3811e-02, PNorm = 86.4864, GNorm = 0.5866, lr_0 = 1.0665e-04
Loss = 8.3325e-02, PNorm = 86.4882, GNorm = 0.7491, lr_0 = 1.0657e-04
Loss = 7.6335e-02, PNorm = 86.4909, GNorm = 0.6773, lr_0 = 1.0650e-04
Loss = 7.1500e-02, PNorm = 86.4945, GNorm = 0.7478, lr_0 = 1.0643e-04
Loss = 8.3538e-02, PNorm = 86.4972, GNorm = 0.9028, lr_0 = 1.0635e-04
Loss = 8.0922e-02, PNorm = 86.5002, GNorm = 0.5417, lr_0 = 1.0628e-04
Loss = 7.3397e-02, PNorm = 86.5018, GNorm = 0.6532, lr_0 = 1.0621e-04
Loss = 6.2363e-02, PNorm = 86.5029, GNorm = 0.5615, lr_0 = 1.0614e-04
Loss = 6.9106e-02, PNorm = 86.5043, GNorm = 0.6272, lr_0 = 1.0606e-04
Loss = 7.3818e-02, PNorm = 86.5042, GNorm = 0.5304, lr_0 = 1.0599e-04
Loss = 7.8299e-02, PNorm = 86.5050, GNorm = 0.5036, lr_0 = 1.0592e-04
Loss = 7.3659e-02, PNorm = 86.5070, GNorm = 0.5439, lr_0 = 1.0585e-04
Loss = 7.8179e-02, PNorm = 86.5079, GNorm = 0.6101, lr_0 = 1.0577e-04
Loss = 7.3593e-02, PNorm = 86.5082, GNorm = 0.5546, lr_0 = 1.0570e-04
Loss = 7.0151e-02, PNorm = 86.5096, GNorm = 0.6487, lr_0 = 1.0563e-04
Loss = 6.9894e-02, PNorm = 86.5123, GNorm = 0.6479, lr_0 = 1.0556e-04
Loss = 7.3737e-02, PNorm = 86.5135, GNorm = 0.6723, lr_0 = 1.0548e-04
Loss = 7.4794e-02, PNorm = 86.5137, GNorm = 0.6109, lr_0 = 1.0541e-04
Loss = 6.6127e-02, PNorm = 86.5140, GNorm = 0.6425, lr_0 = 1.0534e-04
Loss = 8.8128e-02, PNorm = 86.5164, GNorm = 0.6566, lr_0 = 1.0527e-04
Loss = 8.3502e-02, PNorm = 86.5178, GNorm = 0.5989, lr_0 = 1.0519e-04
Loss = 8.3491e-02, PNorm = 86.5194, GNorm = 0.8215, lr_0 = 1.0512e-04
Loss = 8.5356e-02, PNorm = 86.5204, GNorm = 0.8602, lr_0 = 1.0505e-04
Loss = 8.1589e-02, PNorm = 86.5226, GNorm = 0.4931, lr_0 = 1.0498e-04
Loss = 7.2108e-02, PNorm = 86.5245, GNorm = 0.6533, lr_0 = 1.0491e-04
Loss = 7.1840e-02, PNorm = 86.5246, GNorm = 0.5504, lr_0 = 1.0483e-04
Loss = 6.8863e-02, PNorm = 86.5256, GNorm = 0.5147, lr_0 = 1.0476e-04
Loss = 7.7217e-02, PNorm = 86.5268, GNorm = 0.7271, lr_0 = 1.0469e-04
Loss = 6.6132e-02, PNorm = 86.5275, GNorm = 0.5441, lr_0 = 1.0462e-04
Loss = 8.1440e-02, PNorm = 86.5297, GNorm = 0.5700, lr_0 = 1.0455e-04
Loss = 8.7266e-02, PNorm = 86.5318, GNorm = 0.6897, lr_0 = 1.0448e-04
Loss = 7.8902e-02, PNorm = 86.5334, GNorm = 0.7024, lr_0 = 1.0440e-04
Loss = 6.8518e-02, PNorm = 86.5338, GNorm = 0.7330, lr_0 = 1.0433e-04
Loss = 6.7886e-02, PNorm = 86.5359, GNorm = 0.5479, lr_0 = 1.0426e-04
Loss = 7.0452e-02, PNorm = 86.5380, GNorm = 0.5417, lr_0 = 1.0419e-04
Loss = 7.2342e-02, PNorm = 86.5386, GNorm = 0.5467, lr_0 = 1.0412e-04
Loss = 7.8668e-02, PNorm = 86.5399, GNorm = 0.5221, lr_0 = 1.0405e-04
Loss = 7.5325e-02, PNorm = 86.5409, GNorm = 0.5533, lr_0 = 1.0398e-04
Loss = 8.3894e-02, PNorm = 86.5431, GNorm = 0.6750, lr_0 = 1.0391e-04
Loss = 7.2917e-02, PNorm = 86.5449, GNorm = 0.5759, lr_0 = 1.0383e-04
Loss = 7.0164e-02, PNorm = 86.5467, GNorm = 0.5884, lr_0 = 1.0376e-04
Loss = 7.5189e-02, PNorm = 86.5488, GNorm = 0.6684, lr_0 = 1.0369e-04
Loss = 7.1012e-02, PNorm = 86.5506, GNorm = 0.7566, lr_0 = 1.0362e-04
Loss = 7.4733e-02, PNorm = 86.5528, GNorm = 0.5614, lr_0 = 1.0355e-04
Loss = 6.9479e-02, PNorm = 86.5550, GNorm = 0.5499, lr_0 = 1.0348e-04
Loss = 7.1241e-02, PNorm = 86.5565, GNorm = 0.6136, lr_0 = 1.0341e-04
Loss = 8.0511e-02, PNorm = 86.5574, GNorm = 0.9129, lr_0 = 1.0334e-04
Loss = 7.5157e-02, PNorm = 86.5589, GNorm = 0.6170, lr_0 = 1.0327e-04
Loss = 7.4136e-02, PNorm = 86.5604, GNorm = 0.6125, lr_0 = 1.0320e-04
Loss = 7.8551e-02, PNorm = 86.5621, GNorm = 0.5669, lr_0 = 1.0312e-04
Loss = 6.4717e-02, PNorm = 86.5641, GNorm = 0.5537, lr_0 = 1.0305e-04
Loss = 7.1894e-02, PNorm = 86.5660, GNorm = 0.5493, lr_0 = 1.0298e-04
Loss = 8.5538e-02, PNorm = 86.5678, GNorm = 0.6651, lr_0 = 1.0291e-04
Loss = 6.5905e-02, PNorm = 86.5694, GNorm = 0.7465, lr_0 = 1.0284e-04
Loss = 7.0298e-02, PNorm = 86.5706, GNorm = 0.4896, lr_0 = 1.0277e-04
Loss = 6.8665e-02, PNorm = 86.5712, GNorm = 0.6056, lr_0 = 1.0270e-04
Loss = 8.0492e-02, PNorm = 86.5733, GNorm = 0.8350, lr_0 = 1.0263e-04
Loss = 7.6911e-02, PNorm = 86.5762, GNorm = 0.8384, lr_0 = 1.0256e-04
Loss = 8.9702e-02, PNorm = 86.5769, GNorm = 0.8502, lr_0 = 1.0249e-04
Loss = 7.1180e-02, PNorm = 86.5782, GNorm = 0.6751, lr_0 = 1.0242e-04
Loss = 7.8356e-02, PNorm = 86.5810, GNorm = 0.6653, lr_0 = 1.0235e-04
Loss = 7.8186e-02, PNorm = 86.5829, GNorm = 0.8044, lr_0 = 1.0228e-04
Loss = 7.1352e-02, PNorm = 86.5831, GNorm = 0.6510, lr_0 = 1.0221e-04
Loss = 7.1200e-02, PNorm = 86.5846, GNorm = 0.4395, lr_0 = 1.0214e-04
Loss = 7.1707e-02, PNorm = 86.5866, GNorm = 0.5514, lr_0 = 1.0207e-04
Loss = 7.6756e-02, PNorm = 86.5890, GNorm = 0.5588, lr_0 = 1.0200e-04
Loss = 7.8403e-02, PNorm = 86.5915, GNorm = 0.5634, lr_0 = 1.0193e-04
Loss = 7.4433e-02, PNorm = 86.5925, GNorm = 0.6680, lr_0 = 1.0186e-04
Loss = 7.5426e-02, PNorm = 86.5931, GNorm = 0.7291, lr_0 = 1.0179e-04
Loss = 6.5804e-02, PNorm = 86.5939, GNorm = 0.4837, lr_0 = 1.0172e-04
Loss = 7.7353e-02, PNorm = 86.5954, GNorm = 0.8538, lr_0 = 1.0165e-04
Loss = 8.8864e-02, PNorm = 86.5976, GNorm = 0.5131, lr_0 = 1.0158e-04
Loss = 7.4623e-02, PNorm = 86.6007, GNorm = 0.6246, lr_0 = 1.0151e-04
Loss = 8.3353e-02, PNorm = 86.6016, GNorm = 0.6867, lr_0 = 1.0144e-04
Loss = 8.5479e-02, PNorm = 86.6029, GNorm = 0.5542, lr_0 = 1.0137e-04
Loss = 7.1305e-02, PNorm = 86.6044, GNorm = 0.6026, lr_0 = 1.0130e-04
Loss = 8.2058e-02, PNorm = 86.6063, GNorm = 0.7411, lr_0 = 1.0123e-04
Loss = 6.3868e-02, PNorm = 86.6077, GNorm = 0.7386, lr_0 = 1.0116e-04
Loss = 6.8700e-02, PNorm = 86.6094, GNorm = 0.6588, lr_0 = 1.0110e-04
Loss = 8.4486e-02, PNorm = 86.6114, GNorm = 0.5933, lr_0 = 1.0103e-04
Loss = 8.3056e-02, PNorm = 86.6127, GNorm = 0.6846, lr_0 = 1.0096e-04
Loss = 7.7955e-02, PNorm = 86.6152, GNorm = 0.6947, lr_0 = 1.0089e-04
Loss = 7.1592e-02, PNorm = 86.6175, GNorm = 0.6214, lr_0 = 1.0082e-04
Loss = 8.0604e-02, PNorm = 86.6172, GNorm = 0.7784, lr_0 = 1.0075e-04
Loss = 7.4501e-02, PNorm = 86.6197, GNorm = 0.5304, lr_0 = 1.0068e-04
Loss = 7.8643e-02, PNorm = 86.6222, GNorm = 0.6689, lr_0 = 1.0061e-04
Loss = 7.7868e-02, PNorm = 86.6225, GNorm = 0.6184, lr_0 = 1.0054e-04
Loss = 7.1792e-02, PNorm = 86.6227, GNorm = 0.5943, lr_0 = 1.0047e-04
Loss = 6.9996e-02, PNorm = 86.6234, GNorm = 0.6033, lr_0 = 1.0041e-04
Loss = 7.5724e-02, PNorm = 86.6238, GNorm = 0.5736, lr_0 = 1.0034e-04
Loss = 7.8968e-02, PNorm = 86.6231, GNorm = 0.6668, lr_0 = 1.0027e-04
Loss = 7.4184e-02, PNorm = 86.6229, GNorm = 0.4874, lr_0 = 1.0020e-04
Loss = 7.7399e-02, PNorm = 86.6257, GNorm = 0.7660, lr_0 = 1.0013e-04
Loss = 7.2781e-02, PNorm = 86.6289, GNorm = 0.5708, lr_0 = 1.0006e-04
Loss = 7.7086e-02, PNorm = 86.6315, GNorm = 0.8446, lr_0 = 1.0000e-04
Validation mae = 0.225650
Model 0 best validation mae = 0.225410 on epoch 25
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.225228
Ensemble test mae = 0.225228
Fold 9
Splitting data with seed 9
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.1, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=500, bias=False)
        (W_h): Linear(in_features=500, out_features=500, bias=False)
        (W_o): Linear(in_features=633, out_features=500, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.1, inplace=False)
    (1): Linear(in_features=500, out_features=500, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.1, inplace=False)
    (4): Linear(in_features=500, out_features=500, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.1, inplace=False)
    (7): Linear(in_features=500, out_features=1, bias=True)
  )
)
Number of parameters = 1,142,001
Moving model to cuda
Epoch 0
Loss = 1.1295e+00, PNorm = 47.8613, GNorm = 2.5829, lr_0 = 1.0413e-04
Loss = 9.6593e-01, PNorm = 47.8628, GNorm = 2.6506, lr_0 = 1.0788e-04
Loss = 8.7806e-01, PNorm = 47.8651, GNorm = 3.7061, lr_0 = 1.1163e-04
Loss = 9.4109e-01, PNorm = 47.8679, GNorm = 2.4519, lr_0 = 1.1537e-04
Loss = 9.1907e-01, PNorm = 47.8719, GNorm = 2.4469, lr_0 = 1.1913e-04
Loss = 8.5321e-01, PNorm = 47.8760, GNorm = 3.9547, lr_0 = 1.2287e-04
Loss = 8.1370e-01, PNorm = 47.8808, GNorm = 4.6751, lr_0 = 1.2663e-04
Loss = 8.0008e-01, PNorm = 47.8858, GNorm = 9.9614, lr_0 = 1.3038e-04
Loss = 7.0641e-01, PNorm = 47.8917, GNorm = 4.5907, lr_0 = 1.3413e-04
Loss = 6.1303e-01, PNorm = 47.8983, GNorm = 12.7045, lr_0 = 1.3788e-04
Loss = 7.3903e-01, PNorm = 47.9036, GNorm = 8.0738, lr_0 = 1.4163e-04
Loss = 5.9658e-01, PNorm = 47.9097, GNorm = 5.9951, lr_0 = 1.4537e-04
Loss = 5.6066e-01, PNorm = 47.9156, GNorm = 3.0195, lr_0 = 1.4913e-04
Loss = 5.2502e-01, PNorm = 47.9218, GNorm = 18.9650, lr_0 = 1.5288e-04
Loss = 5.0604e-01, PNorm = 47.9275, GNorm = 7.6806, lr_0 = 1.5662e-04
Loss = 4.7614e-01, PNorm = 47.9333, GNorm = 3.3405, lr_0 = 1.6038e-04
Loss = 5.0197e-01, PNorm = 47.9393, GNorm = 13.4573, lr_0 = 1.6412e-04
Loss = 5.4922e-01, PNorm = 47.9425, GNorm = 4.6976, lr_0 = 1.6788e-04
Loss = 5.7227e-01, PNorm = 47.9484, GNorm = 5.2168, lr_0 = 1.7163e-04
Loss = 4.2370e-01, PNorm = 47.9570, GNorm = 8.6077, lr_0 = 1.7538e-04
Loss = 4.3600e-01, PNorm = 47.9621, GNorm = 2.3988, lr_0 = 1.7913e-04
Loss = 4.3308e-01, PNorm = 47.9667, GNorm = 9.2440, lr_0 = 1.8288e-04
Loss = 4.5472e-01, PNorm = 47.9720, GNorm = 11.5454, lr_0 = 1.8662e-04
Loss = 4.5035e-01, PNorm = 47.9772, GNorm = 4.1142, lr_0 = 1.9038e-04
Loss = 4.6032e-01, PNorm = 47.9833, GNorm = 15.6146, lr_0 = 1.9413e-04
Loss = 4.1443e-01, PNorm = 47.9896, GNorm = 3.3575, lr_0 = 1.9788e-04
Loss = 4.1031e-01, PNorm = 47.9947, GNorm = 5.2915, lr_0 = 2.0163e-04
Loss = 3.8757e-01, PNorm = 48.0005, GNorm = 6.2259, lr_0 = 2.0537e-04
Loss = 4.1078e-01, PNorm = 48.0074, GNorm = 3.5250, lr_0 = 2.0913e-04
Loss = 3.5180e-01, PNorm = 48.0114, GNorm = 8.0321, lr_0 = 2.1288e-04
Loss = 3.5491e-01, PNorm = 48.0170, GNorm = 8.2115, lr_0 = 2.1663e-04
Loss = 3.6838e-01, PNorm = 48.0214, GNorm = 1.8468, lr_0 = 2.2038e-04
Loss = 3.4991e-01, PNorm = 48.0259, GNorm = 3.4704, lr_0 = 2.2412e-04
Loss = 3.5184e-01, PNorm = 48.0320, GNorm = 17.2004, lr_0 = 2.2787e-04
Loss = 3.3167e-01, PNorm = 48.0348, GNorm = 3.4993, lr_0 = 2.3163e-04
Loss = 3.6351e-01, PNorm = 48.0390, GNorm = 5.3158, lr_0 = 2.3538e-04
Loss = 3.1110e-01, PNorm = 48.0441, GNorm = 1.8696, lr_0 = 2.3913e-04
Loss = 3.9496e-01, PNorm = 48.0505, GNorm = 1.6138, lr_0 = 2.4288e-04
Loss = 4.2708e-01, PNorm = 48.0557, GNorm = 10.7810, lr_0 = 2.4662e-04
Loss = 3.8865e-01, PNorm = 48.0642, GNorm = 6.7387, lr_0 = 2.5038e-04
Loss = 3.5186e-01, PNorm = 48.0700, GNorm = 7.6893, lr_0 = 2.5413e-04
Loss = 2.8120e-01, PNorm = 48.0734, GNorm = 6.8490, lr_0 = 2.5788e-04
Loss = 3.1125e-01, PNorm = 48.0783, GNorm = 9.2057, lr_0 = 2.6163e-04
Loss = 3.9225e-01, PNorm = 48.0836, GNorm = 5.4301, lr_0 = 2.6537e-04
Loss = 3.7789e-01, PNorm = 48.0901, GNorm = 5.1106, lr_0 = 2.6912e-04
Loss = 3.1547e-01, PNorm = 48.0980, GNorm = 1.8421, lr_0 = 2.7288e-04
Loss = 3.2548e-01, PNorm = 48.1033, GNorm = 8.9070, lr_0 = 2.7663e-04
Loss = 3.9211e-01, PNorm = 48.1097, GNorm = 15.0519, lr_0 = 2.8038e-04
Loss = 4.2903e-01, PNorm = 48.1166, GNorm = 8.0914, lr_0 = 2.8413e-04
Loss = 3.6186e-01, PNorm = 48.1243, GNorm = 2.0298, lr_0 = 2.8787e-04
Loss = 3.3075e-01, PNorm = 48.1311, GNorm = 3.7312, lr_0 = 2.9163e-04
Loss = 3.1554e-01, PNorm = 48.1371, GNorm = 7.2827, lr_0 = 2.9538e-04
Loss = 3.7440e-01, PNorm = 48.1439, GNorm = 2.6154, lr_0 = 2.9913e-04
Loss = 3.3286e-01, PNorm = 48.1513, GNorm = 8.9926, lr_0 = 3.0288e-04
Loss = 3.2121e-01, PNorm = 48.1572, GNorm = 2.0892, lr_0 = 3.0662e-04
Loss = 2.9616e-01, PNorm = 48.1642, GNorm = 8.1464, lr_0 = 3.1037e-04
Loss = 2.9456e-01, PNorm = 48.1703, GNorm = 3.5928, lr_0 = 3.1413e-04
Loss = 2.7989e-01, PNorm = 48.1756, GNorm = 1.5269, lr_0 = 3.1788e-04
Loss = 3.1868e-01, PNorm = 48.1806, GNorm = 13.0487, lr_0 = 3.2163e-04
Loss = 4.1553e-01, PNorm = 48.1856, GNorm = 8.4613, lr_0 = 3.2538e-04
Loss = 3.5352e-01, PNorm = 48.1957, GNorm = 2.9261, lr_0 = 3.2912e-04
Loss = 3.2033e-01, PNorm = 48.2021, GNorm = 1.6831, lr_0 = 3.3288e-04
Loss = 2.9025e-01, PNorm = 48.2094, GNorm = 8.4194, lr_0 = 3.3663e-04
Loss = 3.5504e-01, PNorm = 48.2162, GNorm = 2.3522, lr_0 = 3.4038e-04
Loss = 3.0948e-01, PNorm = 48.2230, GNorm = 1.3557, lr_0 = 3.4413e-04
Loss = 3.1249e-01, PNorm = 48.2322, GNorm = 4.0127, lr_0 = 3.4787e-04
Loss = 2.9892e-01, PNorm = 48.2387, GNorm = 1.5000, lr_0 = 3.5162e-04
Loss = 3.7908e-01, PNorm = 48.2463, GNorm = 2.4227, lr_0 = 3.5538e-04
Loss = 2.8903e-01, PNorm = 48.2565, GNorm = 9.0119, lr_0 = 3.5913e-04
Loss = 3.1282e-01, PNorm = 48.2660, GNorm = 4.9534, lr_0 = 3.6288e-04
Loss = 3.2253e-01, PNorm = 48.2703, GNorm = 7.2566, lr_0 = 3.6662e-04
Loss = 3.1766e-01, PNorm = 48.2782, GNorm = 3.0305, lr_0 = 3.7037e-04
Loss = 2.8528e-01, PNorm = 48.2849, GNorm = 1.3972, lr_0 = 3.7413e-04
Loss = 2.6821e-01, PNorm = 48.2955, GNorm = 3.6636, lr_0 = 3.7788e-04
Loss = 3.4486e-01, PNorm = 48.3017, GNorm = 2.2408, lr_0 = 3.8163e-04
Loss = 2.9258e-01, PNorm = 48.3118, GNorm = 6.3774, lr_0 = 3.8537e-04
Loss = 3.1200e-01, PNorm = 48.3174, GNorm = 1.3948, lr_0 = 3.8912e-04
Loss = 2.7040e-01, PNorm = 48.3266, GNorm = 2.5820, lr_0 = 3.9287e-04
Loss = 3.3295e-01, PNorm = 48.3318, GNorm = 4.8213, lr_0 = 3.9663e-04
Loss = 2.7389e-01, PNorm = 48.3385, GNorm = 6.0991, lr_0 = 4.0038e-04
Loss = 3.1213e-01, PNorm = 48.3409, GNorm = 1.4251, lr_0 = 4.0413e-04
Loss = 2.7845e-01, PNorm = 48.3503, GNorm = 3.0369, lr_0 = 4.0787e-04
Loss = 2.7803e-01, PNorm = 48.3583, GNorm = 4.1015, lr_0 = 4.1162e-04
Loss = 2.8063e-01, PNorm = 48.3661, GNorm = 6.5413, lr_0 = 4.1537e-04
Loss = 2.8575e-01, PNorm = 48.3738, GNorm = 7.0343, lr_0 = 4.1913e-04
Loss = 3.4969e-01, PNorm = 48.3840, GNorm = 11.4501, lr_0 = 4.2288e-04
Loss = 3.1770e-01, PNorm = 48.3942, GNorm = 1.7490, lr_0 = 4.2662e-04
Loss = 3.2485e-01, PNorm = 48.4052, GNorm = 1.8948, lr_0 = 4.3037e-04
Loss = 3.1920e-01, PNorm = 48.4148, GNorm = 9.0666, lr_0 = 4.3412e-04
Loss = 2.9026e-01, PNorm = 48.4262, GNorm = 6.6126, lr_0 = 4.3788e-04
Loss = 2.9843e-01, PNorm = 48.4342, GNorm = 3.6111, lr_0 = 4.4163e-04
Loss = 3.0109e-01, PNorm = 48.4453, GNorm = 1.5317, lr_0 = 4.4538e-04
Loss = 3.6013e-01, PNorm = 48.4517, GNorm = 3.1606, lr_0 = 4.4912e-04
Loss = 2.7732e-01, PNorm = 48.4645, GNorm = 7.6184, lr_0 = 4.5287e-04
Loss = 3.1606e-01, PNorm = 48.4724, GNorm = 5.8740, lr_0 = 4.5662e-04
Loss = 2.5918e-01, PNorm = 48.4787, GNorm = 0.9768, lr_0 = 4.6038e-04
Loss = 2.7413e-01, PNorm = 48.4880, GNorm = 1.6074, lr_0 = 4.6413e-04
Loss = 3.2367e-01, PNorm = 48.4918, GNorm = 1.3269, lr_0 = 4.6787e-04
Loss = 3.0194e-01, PNorm = 48.5037, GNorm = 2.6602, lr_0 = 4.7162e-04
Loss = 3.0870e-01, PNorm = 48.5134, GNorm = 5.6839, lr_0 = 4.7537e-04
Loss = 2.9817e-01, PNorm = 48.5263, GNorm = 2.0682, lr_0 = 4.7913e-04
Loss = 2.7193e-01, PNorm = 48.5374, GNorm = 5.4668, lr_0 = 4.8288e-04
Loss = 3.1323e-01, PNorm = 48.5449, GNorm = 1.4736, lr_0 = 4.8663e-04
Loss = 2.8120e-01, PNorm = 48.5612, GNorm = 4.8154, lr_0 = 4.9038e-04
Loss = 2.7842e-01, PNorm = 48.5710, GNorm = 5.6396, lr_0 = 4.9412e-04
Loss = 2.7289e-01, PNorm = 48.5764, GNorm = 5.0409, lr_0 = 4.9788e-04
Loss = 2.6874e-01, PNorm = 48.5871, GNorm = 6.8862, lr_0 = 5.0163e-04
Loss = 3.0482e-01, PNorm = 48.5966, GNorm = 1.7666, lr_0 = 5.0538e-04
Loss = 2.6020e-01, PNorm = 48.6056, GNorm = 1.3445, lr_0 = 5.0913e-04
Loss = 3.0019e-01, PNorm = 48.6163, GNorm = 1.3619, lr_0 = 5.1287e-04
Loss = 2.6896e-01, PNorm = 48.6299, GNorm = 1.3497, lr_0 = 5.1663e-04
Loss = 2.5596e-01, PNorm = 48.6368, GNorm = 1.9087, lr_0 = 5.2038e-04
Loss = 3.0069e-01, PNorm = 48.6466, GNorm = 1.0146, lr_0 = 5.2413e-04
Loss = 2.7091e-01, PNorm = 48.6556, GNorm = 5.7060, lr_0 = 5.2788e-04
Loss = 3.2306e-01, PNorm = 48.6667, GNorm = 11.7568, lr_0 = 5.3162e-04
Loss = 3.5551e-01, PNorm = 48.6780, GNorm = 8.8493, lr_0 = 5.3538e-04
Loss = 2.7545e-01, PNorm = 48.6961, GNorm = 0.8843, lr_0 = 5.3912e-04
Loss = 3.1609e-01, PNorm = 48.7120, GNorm = 2.1406, lr_0 = 5.4288e-04
Loss = 2.9166e-01, PNorm = 48.7274, GNorm = 4.7754, lr_0 = 5.4663e-04
Loss = 2.8610e-01, PNorm = 48.7429, GNorm = 0.9669, lr_0 = 5.5038e-04
Validation mae = 0.346596
Epoch 1
Loss = 2.7455e-01, PNorm = 48.7495, GNorm = 3.1761, lr_0 = 5.5413e-04
Loss = 3.3023e-01, PNorm = 48.7609, GNorm = 7.4902, lr_0 = 5.5787e-04
Loss = 2.8795e-01, PNorm = 48.7754, GNorm = 1.3567, lr_0 = 5.6163e-04
Loss = 2.8964e-01, PNorm = 48.7830, GNorm = 3.3707, lr_0 = 5.6538e-04
Loss = 2.7953e-01, PNorm = 48.7923, GNorm = 1.7357, lr_0 = 5.6913e-04
Loss = 2.3900e-01, PNorm = 48.8057, GNorm = 3.7493, lr_0 = 5.7288e-04
Loss = 2.7755e-01, PNorm = 48.8154, GNorm = 2.6731, lr_0 = 5.7662e-04
Loss = 2.7239e-01, PNorm = 48.8256, GNorm = 2.1574, lr_0 = 5.8038e-04
Loss = 2.5537e-01, PNorm = 48.8348, GNorm = 3.2152, lr_0 = 5.8413e-04
Loss = 2.8405e-01, PNorm = 48.8463, GNorm = 3.4300, lr_0 = 5.8788e-04
Loss = 2.9054e-01, PNorm = 48.8633, GNorm = 2.0104, lr_0 = 5.9163e-04
Loss = 2.5878e-01, PNorm = 48.8788, GNorm = 1.2967, lr_0 = 5.9538e-04
Loss = 2.3776e-01, PNorm = 48.8980, GNorm = 8.9326, lr_0 = 5.9913e-04
Loss = 3.1904e-01, PNorm = 48.9124, GNorm = 7.9532, lr_0 = 6.0288e-04
Loss = 2.5296e-01, PNorm = 48.9307, GNorm = 4.2806, lr_0 = 6.0663e-04
Loss = 2.7338e-01, PNorm = 48.9444, GNorm = 2.6692, lr_0 = 6.1038e-04
Loss = 2.9715e-01, PNorm = 48.9628, GNorm = 2.7682, lr_0 = 6.1413e-04
Loss = 2.6352e-01, PNorm = 48.9781, GNorm = 2.1782, lr_0 = 6.1788e-04
Loss = 2.7328e-01, PNorm = 48.9891, GNorm = 1.4661, lr_0 = 6.2163e-04
Loss = 2.9281e-01, PNorm = 49.0020, GNorm = 0.9198, lr_0 = 6.2538e-04
Loss = 2.7640e-01, PNorm = 49.0151, GNorm = 0.8153, lr_0 = 6.2913e-04
Loss = 2.5475e-01, PNorm = 49.0274, GNorm = 0.8340, lr_0 = 6.3288e-04
Loss = 2.4845e-01, PNorm = 49.0428, GNorm = 7.4554, lr_0 = 6.3663e-04
Loss = 3.0224e-01, PNorm = 49.0604, GNorm = 4.1954, lr_0 = 6.4038e-04
Loss = 3.1155e-01, PNorm = 49.0746, GNorm = 2.1384, lr_0 = 6.4413e-04
Loss = 2.6947e-01, PNorm = 49.0929, GNorm = 4.3256, lr_0 = 6.4788e-04
Loss = 2.6818e-01, PNorm = 49.1133, GNorm = 1.6990, lr_0 = 6.5163e-04
Loss = 2.7557e-01, PNorm = 49.1264, GNorm = 1.4524, lr_0 = 6.5538e-04
Loss = 3.4627e-01, PNorm = 49.1397, GNorm = 9.1219, lr_0 = 6.5913e-04
Loss = 3.8868e-01, PNorm = 49.1602, GNorm = 1.3275, lr_0 = 6.6288e-04
Loss = 3.2384e-01, PNorm = 49.1948, GNorm = 1.1644, lr_0 = 6.6663e-04
Loss = 2.6509e-01, PNorm = 49.2118, GNorm = 3.2137, lr_0 = 6.7038e-04
Loss = 2.7206e-01, PNorm = 49.2276, GNorm = 3.0268, lr_0 = 6.7413e-04
Loss = 2.5260e-01, PNorm = 49.2369, GNorm = 1.6757, lr_0 = 6.7788e-04
Loss = 2.3301e-01, PNorm = 49.2509, GNorm = 1.5583, lr_0 = 6.8163e-04
Loss = 2.6468e-01, PNorm = 49.2601, GNorm = 3.0598, lr_0 = 6.8538e-04
Loss = 2.7980e-01, PNorm = 49.2715, GNorm = 1.7912, lr_0 = 6.8913e-04
Loss = 2.6719e-01, PNorm = 49.2900, GNorm = 2.0867, lr_0 = 6.9288e-04
Loss = 3.3337e-01, PNorm = 49.2998, GNorm = 2.8737, lr_0 = 6.9663e-04
Loss = 3.1025e-01, PNorm = 49.3104, GNorm = 5.7837, lr_0 = 7.0038e-04
Loss = 3.0258e-01, PNorm = 49.3309, GNorm = 5.0331, lr_0 = 7.0413e-04
Loss = 2.9286e-01, PNorm = 49.3467, GNorm = 2.3176, lr_0 = 7.0788e-04
Loss = 2.5211e-01, PNorm = 49.3647, GNorm = 1.9627, lr_0 = 7.1163e-04
Loss = 2.6301e-01, PNorm = 49.3707, GNorm = 1.2244, lr_0 = 7.1538e-04
Loss = 2.8279e-01, PNorm = 49.3864, GNorm = 2.3902, lr_0 = 7.1913e-04
Loss = 2.8016e-01, PNorm = 49.4013, GNorm = 3.8816, lr_0 = 7.2288e-04
Loss = 2.5248e-01, PNorm = 49.4211, GNorm = 3.5953, lr_0 = 7.2663e-04
Loss = 2.7848e-01, PNorm = 49.4332, GNorm = 0.7550, lr_0 = 7.3038e-04
Loss = 3.2643e-01, PNorm = 49.4459, GNorm = 5.0437, lr_0 = 7.3413e-04
Loss = 2.8828e-01, PNorm = 49.4724, GNorm = 3.2763, lr_0 = 7.3788e-04
Loss = 2.7782e-01, PNorm = 49.4890, GNorm = 2.9608, lr_0 = 7.4163e-04
Loss = 2.7700e-01, PNorm = 49.5066, GNorm = 3.9780, lr_0 = 7.4538e-04
Loss = 2.6156e-01, PNorm = 49.5210, GNorm = 0.7662, lr_0 = 7.4913e-04
Loss = 2.3125e-01, PNorm = 49.5362, GNorm = 1.2319, lr_0 = 7.5288e-04
Loss = 2.7174e-01, PNorm = 49.5506, GNorm = 0.8641, lr_0 = 7.5663e-04
Loss = 2.7799e-01, PNorm = 49.5709, GNorm = 1.5594, lr_0 = 7.6038e-04
Loss = 2.4252e-01, PNorm = 49.5918, GNorm = 2.8866, lr_0 = 7.6413e-04
Loss = 2.6001e-01, PNorm = 49.6087, GNorm = 2.7744, lr_0 = 7.6788e-04
Loss = 2.7521e-01, PNorm = 49.6238, GNorm = 0.9195, lr_0 = 7.7163e-04
Loss = 2.2988e-01, PNorm = 49.6443, GNorm = 2.3134, lr_0 = 7.7538e-04
Loss = 3.0138e-01, PNorm = 49.6613, GNorm = 2.7426, lr_0 = 7.7913e-04
Loss = 2.4269e-01, PNorm = 49.6858, GNorm = 1.5409, lr_0 = 7.8288e-04
Loss = 2.8293e-01, PNorm = 49.7037, GNorm = 0.8932, lr_0 = 7.8663e-04
Loss = 2.8008e-01, PNorm = 49.7288, GNorm = 2.1172, lr_0 = 7.9038e-04
Loss = 2.3629e-01, PNorm = 49.7502, GNorm = 2.7835, lr_0 = 7.9413e-04
Loss = 2.6073e-01, PNorm = 49.7706, GNorm = 1.2301, lr_0 = 7.9788e-04
Loss = 2.3540e-01, PNorm = 49.7857, GNorm = 0.8027, lr_0 = 8.0163e-04
Loss = 2.5421e-01, PNorm = 49.8025, GNorm = 4.6898, lr_0 = 8.0538e-04
Loss = 2.5909e-01, PNorm = 49.8184, GNorm = 1.6648, lr_0 = 8.0913e-04
Loss = 2.1896e-01, PNorm = 49.8329, GNorm = 0.7040, lr_0 = 8.1288e-04
Loss = 2.6530e-01, PNorm = 49.8446, GNorm = 1.1189, lr_0 = 8.1663e-04
Loss = 2.3541e-01, PNorm = 49.8663, GNorm = 4.3869, lr_0 = 8.2038e-04
Loss = 2.3854e-01, PNorm = 49.8781, GNorm = 1.3741, lr_0 = 8.2413e-04
Loss = 2.4266e-01, PNorm = 49.8856, GNorm = 2.5055, lr_0 = 8.2788e-04
Loss = 2.4721e-01, PNorm = 49.9072, GNorm = 6.4671, lr_0 = 8.3163e-04
Loss = 2.4679e-01, PNorm = 49.9292, GNorm = 2.2284, lr_0 = 8.3538e-04
Loss = 2.5463e-01, PNorm = 49.9613, GNorm = 1.0053, lr_0 = 8.3913e-04
Loss = 2.5246e-01, PNorm = 49.9864, GNorm = 2.0035, lr_0 = 8.4288e-04
Loss = 2.8077e-01, PNorm = 50.0101, GNorm = 1.6170, lr_0 = 8.4663e-04
Loss = 2.4749e-01, PNorm = 50.0403, GNorm = 4.0859, lr_0 = 8.5038e-04
Loss = 2.3199e-01, PNorm = 50.0645, GNorm = 0.7946, lr_0 = 8.5413e-04
Loss = 2.4342e-01, PNorm = 50.0839, GNorm = 4.2732, lr_0 = 8.5788e-04
Loss = 2.6734e-01, PNorm = 50.1131, GNorm = 2.2630, lr_0 = 8.6163e-04
Loss = 2.4821e-01, PNorm = 50.1373, GNorm = 1.8681, lr_0 = 8.6538e-04
Loss = 2.3920e-01, PNorm = 50.1607, GNorm = 2.2369, lr_0 = 8.6913e-04
Loss = 2.3811e-01, PNorm = 50.1772, GNorm = 3.4385, lr_0 = 8.7288e-04
Loss = 2.3723e-01, PNorm = 50.1986, GNorm = 2.5297, lr_0 = 8.7663e-04
Loss = 2.7233e-01, PNorm = 50.2134, GNorm = 2.0757, lr_0 = 8.8038e-04
Loss = 3.1053e-01, PNorm = 50.2413, GNorm = 1.9878, lr_0 = 8.8413e-04
Loss = 2.9284e-01, PNorm = 50.2612, GNorm = 1.8544, lr_0 = 8.8788e-04
Loss = 2.7964e-01, PNorm = 50.2867, GNorm = 3.4274, lr_0 = 8.9163e-04
Loss = 2.8022e-01, PNorm = 50.3001, GNorm = 2.1219, lr_0 = 8.9538e-04
Loss = 2.5110e-01, PNorm = 50.3212, GNorm = 1.8135, lr_0 = 8.9913e-04
Loss = 2.1985e-01, PNorm = 50.3393, GNorm = 2.8220, lr_0 = 9.0288e-04
Loss = 2.4716e-01, PNorm = 50.3622, GNorm = 4.8529, lr_0 = 9.0663e-04
Loss = 2.7600e-01, PNorm = 50.3873, GNorm = 1.6863, lr_0 = 9.1038e-04
Loss = 2.5477e-01, PNorm = 50.4091, GNorm = 1.6305, lr_0 = 9.1413e-04
Loss = 2.5280e-01, PNorm = 50.4296, GNorm = 0.9944, lr_0 = 9.1788e-04
Loss = 2.6313e-01, PNorm = 50.4460, GNorm = 1.1823, lr_0 = 9.2163e-04
Loss = 2.6216e-01, PNorm = 50.4690, GNorm = 1.2829, lr_0 = 9.2538e-04
Loss = 2.0803e-01, PNorm = 50.4896, GNorm = 1.0212, lr_0 = 9.2913e-04
Loss = 2.5567e-01, PNorm = 50.5083, GNorm = 5.6635, lr_0 = 9.3288e-04
Loss = 2.5698e-01, PNorm = 50.5312, GNorm = 0.8020, lr_0 = 9.3663e-04
Loss = 2.5431e-01, PNorm = 50.5548, GNorm = 2.4120, lr_0 = 9.4038e-04
Loss = 2.6982e-01, PNorm = 50.5851, GNorm = 1.9247, lr_0 = 9.4413e-04
Loss = 2.4821e-01, PNorm = 50.5957, GNorm = 2.2054, lr_0 = 9.4788e-04
Loss = 2.4660e-01, PNorm = 50.6217, GNorm = 1.5350, lr_0 = 9.5163e-04
Loss = 2.6767e-01, PNorm = 50.6466, GNorm = 0.8613, lr_0 = 9.5538e-04
Loss = 2.6048e-01, PNorm = 50.6776, GNorm = 1.6964, lr_0 = 9.5913e-04
Loss = 2.4832e-01, PNorm = 50.7046, GNorm = 1.9685, lr_0 = 9.6288e-04
Loss = 2.2408e-01, PNorm = 50.7230, GNorm = 2.1190, lr_0 = 9.6663e-04
Loss = 2.4466e-01, PNorm = 50.7496, GNorm = 3.4698, lr_0 = 9.7038e-04
Loss = 2.6477e-01, PNorm = 50.7686, GNorm = 4.3776, lr_0 = 9.7413e-04
Loss = 2.7498e-01, PNorm = 50.8003, GNorm = 2.6844, lr_0 = 9.7788e-04
Loss = 2.1222e-01, PNorm = 50.8357, GNorm = 2.4097, lr_0 = 9.8163e-04
Loss = 2.6051e-01, PNorm = 50.8665, GNorm = 2.0292, lr_0 = 9.8537e-04
Loss = 2.4324e-01, PNorm = 50.8921, GNorm = 3.9555, lr_0 = 9.8912e-04
Loss = 2.5589e-01, PNorm = 50.9118, GNorm = 4.6075, lr_0 = 9.9288e-04
Loss = 2.8617e-01, PNorm = 50.9414, GNorm = 2.5730, lr_0 = 9.9663e-04
Loss = 2.3711e-01, PNorm = 50.9781, GNorm = 1.8893, lr_0 = 9.9993e-04
Validation mae = 0.326401
Epoch 2
Loss = 2.8534e-01, PNorm = 51.0108, GNorm = 4.1663, lr_0 = 9.9925e-04
Loss = 2.7605e-01, PNorm = 51.0444, GNorm = 0.8804, lr_0 = 9.9856e-04
Loss = 2.4975e-01, PNorm = 51.0849, GNorm = 0.9599, lr_0 = 9.9788e-04
Loss = 2.0335e-01, PNorm = 51.1107, GNorm = 0.9848, lr_0 = 9.9719e-04
Loss = 2.4184e-01, PNorm = 51.1331, GNorm = 1.2784, lr_0 = 9.9651e-04
Loss = 2.0347e-01, PNorm = 51.1603, GNorm = 0.9014, lr_0 = 9.9583e-04
Loss = 2.3015e-01, PNorm = 51.1834, GNorm = 2.4214, lr_0 = 9.9515e-04
Loss = 2.2303e-01, PNorm = 51.2054, GNorm = 1.1866, lr_0 = 9.9446e-04
Loss = 2.5331e-01, PNorm = 51.2279, GNorm = 2.9869, lr_0 = 9.9378e-04
Loss = 2.4224e-01, PNorm = 51.2527, GNorm = 2.1792, lr_0 = 9.9310e-04
Loss = 2.3821e-01, PNorm = 51.2825, GNorm = 2.7089, lr_0 = 9.9242e-04
Loss = 2.5290e-01, PNorm = 51.3112, GNorm = 2.6950, lr_0 = 9.9174e-04
Loss = 2.1361e-01, PNorm = 51.3403, GNorm = 0.8351, lr_0 = 9.9106e-04
Loss = 2.5226e-01, PNorm = 51.3630, GNorm = 1.2028, lr_0 = 9.9038e-04
Loss = 2.7912e-01, PNorm = 51.3923, GNorm = 1.9470, lr_0 = 9.8971e-04
Loss = 2.5709e-01, PNorm = 51.4298, GNorm = 0.8782, lr_0 = 9.8903e-04
Loss = 2.5148e-01, PNorm = 51.4504, GNorm = 0.9889, lr_0 = 9.8835e-04
Loss = 2.0670e-01, PNorm = 51.4895, GNorm = 1.1915, lr_0 = 9.8767e-04
Loss = 2.2367e-01, PNorm = 51.5199, GNorm = 2.4402, lr_0 = 9.8700e-04
Loss = 2.3293e-01, PNorm = 51.5486, GNorm = 1.0050, lr_0 = 9.8632e-04
Loss = 2.1175e-01, PNorm = 51.5809, GNorm = 1.0891, lr_0 = 9.8564e-04
Loss = 2.3413e-01, PNorm = 51.5929, GNorm = 0.7263, lr_0 = 9.8497e-04
Loss = 2.4334e-01, PNorm = 51.6179, GNorm = 1.2317, lr_0 = 9.8429e-04
Loss = 2.1454e-01, PNorm = 51.6480, GNorm = 0.9899, lr_0 = 9.8362e-04
Loss = 2.1027e-01, PNorm = 51.6736, GNorm = 0.8031, lr_0 = 9.8295e-04
Loss = 2.2419e-01, PNorm = 51.6922, GNorm = 1.0856, lr_0 = 9.8227e-04
Loss = 1.8424e-01, PNorm = 51.7137, GNorm = 1.3166, lr_0 = 9.8160e-04
Loss = 2.2929e-01, PNorm = 51.7306, GNorm = 0.6772, lr_0 = 9.8093e-04
Loss = 2.4301e-01, PNorm = 51.7457, GNorm = 0.7517, lr_0 = 9.8026e-04
Loss = 2.3306e-01, PNorm = 51.7650, GNorm = 2.2492, lr_0 = 9.7958e-04
Loss = 2.1451e-01, PNorm = 51.7927, GNorm = 1.3381, lr_0 = 9.7891e-04
Loss = 2.2383e-01, PNorm = 51.8245, GNorm = 5.4968, lr_0 = 9.7824e-04
Loss = 2.2421e-01, PNorm = 51.8459, GNorm = 0.7803, lr_0 = 9.7757e-04
Loss = 2.6690e-01, PNorm = 51.8718, GNorm = 2.4869, lr_0 = 9.7690e-04
Loss = 2.4378e-01, PNorm = 51.9034, GNorm = 1.2985, lr_0 = 9.7623e-04
Loss = 2.3080e-01, PNorm = 51.9341, GNorm = 1.6804, lr_0 = 9.7556e-04
Loss = 2.2504e-01, PNorm = 51.9499, GNorm = 0.5777, lr_0 = 9.7490e-04
Loss = 2.2251e-01, PNorm = 51.9714, GNorm = 0.8761, lr_0 = 9.7423e-04
Loss = 2.3636e-01, PNorm = 52.0035, GNorm = 1.1889, lr_0 = 9.7356e-04
Loss = 2.7997e-01, PNorm = 52.0259, GNorm = 2.8670, lr_0 = 9.7289e-04
Loss = 2.1685e-01, PNorm = 52.0582, GNorm = 1.6195, lr_0 = 9.7223e-04
Loss = 2.6725e-01, PNorm = 52.0768, GNorm = 2.6347, lr_0 = 9.7156e-04
Loss = 2.2826e-01, PNorm = 52.1054, GNorm = 1.3539, lr_0 = 9.7090e-04
Loss = 2.2012e-01, PNorm = 52.1331, GNorm = 1.4074, lr_0 = 9.7023e-04
Loss = 2.3960e-01, PNorm = 52.1608, GNorm = 0.8501, lr_0 = 9.6957e-04
Loss = 2.2210e-01, PNorm = 52.1874, GNorm = 1.7045, lr_0 = 9.6890e-04
Loss = 2.2018e-01, PNorm = 52.1984, GNorm = 0.9930, lr_0 = 9.6824e-04
Loss = 2.7376e-01, PNorm = 52.2240, GNorm = 2.7594, lr_0 = 9.6757e-04
Loss = 2.5691e-01, PNorm = 52.2623, GNorm = 3.2646, lr_0 = 9.6691e-04
Loss = 2.3630e-01, PNorm = 52.2834, GNorm = 0.7785, lr_0 = 9.6625e-04
Loss = 2.3458e-01, PNorm = 52.3195, GNorm = 1.6400, lr_0 = 9.6559e-04
Loss = 2.3833e-01, PNorm = 52.3416, GNorm = 3.5373, lr_0 = 9.6493e-04
Loss = 2.2065e-01, PNorm = 52.3742, GNorm = 1.2823, lr_0 = 9.6427e-04
Loss = 2.3970e-01, PNorm = 52.4109, GNorm = 1.8441, lr_0 = 9.6360e-04
Loss = 2.4262e-01, PNorm = 52.4465, GNorm = 3.2939, lr_0 = 9.6294e-04
Loss = 2.1966e-01, PNorm = 52.4872, GNorm = 1.5216, lr_0 = 9.6228e-04
Loss = 2.4040e-01, PNorm = 52.5171, GNorm = 2.1949, lr_0 = 9.6163e-04
Loss = 2.3711e-01, PNorm = 52.5385, GNorm = 1.2805, lr_0 = 9.6097e-04
Loss = 2.3769e-01, PNorm = 52.5653, GNorm = 1.5011, lr_0 = 9.6031e-04
Loss = 2.1769e-01, PNorm = 52.6019, GNorm = 0.6572, lr_0 = 9.5965e-04
Loss = 2.1580e-01, PNorm = 52.6174, GNorm = 0.9859, lr_0 = 9.5899e-04
Loss = 2.1032e-01, PNorm = 52.6383, GNorm = 0.6487, lr_0 = 9.5834e-04
Loss = 2.4305e-01, PNorm = 52.6583, GNorm = 0.6739, lr_0 = 9.5768e-04
Loss = 1.9793e-01, PNorm = 52.6789, GNorm = 0.7101, lr_0 = 9.5702e-04
Loss = 2.0498e-01, PNorm = 52.6990, GNorm = 0.9728, lr_0 = 9.5637e-04
Loss = 2.2487e-01, PNorm = 52.7167, GNorm = 1.4621, lr_0 = 9.5571e-04
Loss = 2.1370e-01, PNorm = 52.7310, GNorm = 0.7968, lr_0 = 9.5506e-04
Loss = 2.0229e-01, PNorm = 52.7524, GNorm = 0.7971, lr_0 = 9.5440e-04
Loss = 2.3391e-01, PNorm = 52.7802, GNorm = 2.8849, lr_0 = 9.5375e-04
Loss = 2.4923e-01, PNorm = 52.8029, GNorm = 1.6228, lr_0 = 9.5310e-04
Loss = 2.0453e-01, PNorm = 52.8287, GNorm = 0.9980, lr_0 = 9.5244e-04
Loss = 2.5401e-01, PNorm = 52.8508, GNorm = 1.3267, lr_0 = 9.5179e-04
Loss = 2.7761e-01, PNorm = 52.8757, GNorm = 1.7009, lr_0 = 9.5114e-04
Loss = 1.9782e-01, PNorm = 52.9109, GNorm = 0.7752, lr_0 = 9.5049e-04
Loss = 2.0399e-01, PNorm = 52.9404, GNorm = 2.1090, lr_0 = 9.4984e-04
Loss = 2.3449e-01, PNorm = 52.9648, GNorm = 2.7268, lr_0 = 9.4919e-04
Loss = 2.5102e-01, PNorm = 52.9903, GNorm = 1.4559, lr_0 = 9.4854e-04
Loss = 2.0658e-01, PNorm = 53.0180, GNorm = 0.6341, lr_0 = 9.4789e-04
Loss = 2.2251e-01, PNorm = 53.0564, GNorm = 1.1493, lr_0 = 9.4724e-04
Loss = 2.3022e-01, PNorm = 53.0822, GNorm = 0.5514, lr_0 = 9.4659e-04
Loss = 1.9780e-01, PNorm = 53.1089, GNorm = 0.9318, lr_0 = 9.4594e-04
Loss = 2.1000e-01, PNorm = 53.1299, GNorm = 1.7504, lr_0 = 9.4529e-04
Loss = 2.2359e-01, PNorm = 53.1530, GNorm = 0.6782, lr_0 = 9.4464e-04
Loss = 2.2347e-01, PNorm = 53.1785, GNorm = 2.4491, lr_0 = 9.4400e-04
Loss = 2.3176e-01, PNorm = 53.2002, GNorm = 1.3830, lr_0 = 9.4335e-04
Loss = 2.4428e-01, PNorm = 53.2310, GNorm = 1.5365, lr_0 = 9.4270e-04
Loss = 2.1040e-01, PNorm = 53.2610, GNorm = 1.6597, lr_0 = 9.4206e-04
Loss = 2.3119e-01, PNorm = 53.2847, GNorm = 2.5804, lr_0 = 9.4141e-04
Loss = 2.2164e-01, PNorm = 53.3110, GNorm = 1.1685, lr_0 = 9.4077e-04
Loss = 2.3513e-01, PNorm = 53.3481, GNorm = 0.9855, lr_0 = 9.4012e-04
Loss = 2.8148e-01, PNorm = 53.3761, GNorm = 3.9193, lr_0 = 9.3948e-04
Loss = 2.7356e-01, PNorm = 53.4095, GNorm = 1.3570, lr_0 = 9.3884e-04
Loss = 2.4060e-01, PNorm = 53.4457, GNorm = 1.3945, lr_0 = 9.3819e-04
Loss = 1.9889e-01, PNorm = 53.4668, GNorm = 0.7675, lr_0 = 9.3755e-04
Loss = 2.0785e-01, PNorm = 53.4882, GNorm = 0.5998, lr_0 = 9.3691e-04
Loss = 1.9687e-01, PNorm = 53.5075, GNorm = 0.8290, lr_0 = 9.3627e-04
Loss = 2.3258e-01, PNorm = 53.5198, GNorm = 2.1253, lr_0 = 9.3562e-04
Loss = 2.1520e-01, PNorm = 53.5419, GNorm = 1.7038, lr_0 = 9.3498e-04
Loss = 2.2297e-01, PNorm = 53.5676, GNorm = 1.1068, lr_0 = 9.3434e-04
Loss = 2.2498e-01, PNorm = 53.5949, GNorm = 1.0833, lr_0 = 9.3370e-04
Loss = 2.3044e-01, PNorm = 53.6288, GNorm = 1.1572, lr_0 = 9.3306e-04
Loss = 2.1913e-01, PNorm = 53.6490, GNorm = 0.9813, lr_0 = 9.3242e-04
Loss = 2.1563e-01, PNorm = 53.6834, GNorm = 0.7249, lr_0 = 9.3178e-04
Loss = 2.3479e-01, PNorm = 53.7136, GNorm = 1.5255, lr_0 = 9.3115e-04
Loss = 2.0987e-01, PNorm = 53.7393, GNorm = 0.9455, lr_0 = 9.3051e-04
Loss = 2.0457e-01, PNorm = 53.7648, GNorm = 1.3469, lr_0 = 9.2987e-04
Loss = 2.2848e-01, PNorm = 53.7896, GNorm = 2.2468, lr_0 = 9.2923e-04
Loss = 2.2110e-01, PNorm = 53.8209, GNorm = 0.6599, lr_0 = 9.2860e-04
Loss = 2.2414e-01, PNorm = 53.8441, GNorm = 1.8560, lr_0 = 9.2796e-04
Loss = 2.2308e-01, PNorm = 53.8730, GNorm = 0.6500, lr_0 = 9.2733e-04
Loss = 2.3183e-01, PNorm = 53.8929, GNorm = 1.5083, lr_0 = 9.2669e-04
Loss = 2.4629e-01, PNorm = 53.9154, GNorm = 0.9275, lr_0 = 9.2606e-04
Loss = 2.1161e-01, PNorm = 53.9377, GNorm = 1.0882, lr_0 = 9.2542e-04
Loss = 1.9682e-01, PNorm = 53.9644, GNorm = 1.0041, lr_0 = 9.2479e-04
Loss = 2.0193e-01, PNorm = 53.9891, GNorm = 0.9690, lr_0 = 9.2415e-04
Loss = 2.1212e-01, PNorm = 54.0142, GNorm = 1.4906, lr_0 = 9.2352e-04
Loss = 2.0295e-01, PNorm = 54.0368, GNorm = 2.0253, lr_0 = 9.2289e-04
Loss = 1.9445e-01, PNorm = 54.0659, GNorm = 1.3427, lr_0 = 9.2226e-04
Loss = 2.1025e-01, PNorm = 54.0897, GNorm = 1.4748, lr_0 = 9.2162e-04
Loss = 2.2368e-01, PNorm = 54.1162, GNorm = 2.0015, lr_0 = 9.2099e-04
Validation mae = 0.313415
Epoch 3
Loss = 2.3311e-01, PNorm = 54.1607, GNorm = 1.3782, lr_0 = 9.2036e-04
Loss = 1.9199e-01, PNorm = 54.1894, GNorm = 0.5854, lr_0 = 9.1973e-04
Loss = 2.0907e-01, PNorm = 54.2249, GNorm = 3.0742, lr_0 = 9.1910e-04
Loss = 1.9414e-01, PNorm = 54.2578, GNorm = 1.2218, lr_0 = 9.1847e-04
Loss = 2.0320e-01, PNorm = 54.2806, GNorm = 1.0815, lr_0 = 9.1784e-04
Loss = 1.8594e-01, PNorm = 54.3067, GNorm = 1.3157, lr_0 = 9.1721e-04
Loss = 1.9328e-01, PNorm = 54.3283, GNorm = 0.8787, lr_0 = 9.1658e-04
Loss = 2.3338e-01, PNorm = 54.3546, GNorm = 2.1276, lr_0 = 9.1596e-04
Loss = 1.8538e-01, PNorm = 54.3796, GNorm = 1.0418, lr_0 = 9.1533e-04
Loss = 1.7076e-01, PNorm = 54.4063, GNorm = 1.0984, lr_0 = 9.1470e-04
Loss = 2.3990e-01, PNorm = 54.4375, GNorm = 1.2001, lr_0 = 9.1408e-04
Loss = 1.9690e-01, PNorm = 54.4690, GNorm = 1.8170, lr_0 = 9.1345e-04
Loss = 2.1224e-01, PNorm = 54.4958, GNorm = 0.8875, lr_0 = 9.1282e-04
Loss = 2.2950e-01, PNorm = 54.5129, GNorm = 1.0648, lr_0 = 9.1220e-04
Loss = 1.7533e-01, PNorm = 54.5331, GNorm = 1.6033, lr_0 = 9.1157e-04
Loss = 1.9168e-01, PNorm = 54.5610, GNorm = 0.9400, lr_0 = 9.1095e-04
Loss = 2.2164e-01, PNorm = 54.5849, GNorm = 1.7939, lr_0 = 9.1032e-04
Loss = 2.4807e-01, PNorm = 54.6232, GNorm = 0.8091, lr_0 = 9.0970e-04
Loss = 2.1734e-01, PNorm = 54.6605, GNorm = 1.9783, lr_0 = 9.0908e-04
Loss = 2.2852e-01, PNorm = 54.6995, GNorm = 1.1875, lr_0 = 9.0846e-04
Loss = 2.0914e-01, PNorm = 54.7266, GNorm = 1.1596, lr_0 = 9.0783e-04
Loss = 1.8978e-01, PNorm = 54.7484, GNorm = 0.7746, lr_0 = 9.0721e-04
Loss = 1.8992e-01, PNorm = 54.7723, GNorm = 0.8791, lr_0 = 9.0659e-04
Loss = 1.9241e-01, PNorm = 54.7942, GNorm = 0.7317, lr_0 = 9.0597e-04
Loss = 2.0093e-01, PNorm = 54.8140, GNorm = 1.4899, lr_0 = 9.0535e-04
Loss = 1.8368e-01, PNorm = 54.8364, GNorm = 3.1832, lr_0 = 9.0473e-04
Loss = 2.1820e-01, PNorm = 54.8599, GNorm = 1.4935, lr_0 = 9.0411e-04
Loss = 2.1735e-01, PNorm = 54.8915, GNorm = 1.6159, lr_0 = 9.0349e-04
Loss = 1.9303e-01, PNorm = 54.9234, GNorm = 1.6713, lr_0 = 9.0287e-04
Loss = 2.3476e-01, PNorm = 54.9473, GNorm = 1.7893, lr_0 = 9.0225e-04
Loss = 2.0557e-01, PNorm = 54.9893, GNorm = 1.1839, lr_0 = 9.0163e-04
Loss = 1.9617e-01, PNorm = 55.0146, GNorm = 0.9349, lr_0 = 9.0102e-04
Loss = 1.9085e-01, PNorm = 55.0395, GNorm = 0.8328, lr_0 = 9.0040e-04
Loss = 1.9940e-01, PNorm = 55.0546, GNorm = 1.1467, lr_0 = 8.9978e-04
Loss = 2.2246e-01, PNorm = 55.0765, GNorm = 1.2558, lr_0 = 8.9916e-04
Loss = 2.0120e-01, PNorm = 55.1040, GNorm = 0.6320, lr_0 = 8.9855e-04
Loss = 1.8571e-01, PNorm = 55.1306, GNorm = 0.6193, lr_0 = 8.9793e-04
Loss = 1.8066e-01, PNorm = 55.1537, GNorm = 0.7203, lr_0 = 8.9732e-04
Loss = 1.9866e-01, PNorm = 55.1803, GNorm = 1.0279, lr_0 = 8.9670e-04
Loss = 1.9226e-01, PNorm = 55.2123, GNorm = 0.6058, lr_0 = 8.9609e-04
Loss = 2.0358e-01, PNorm = 55.2373, GNorm = 0.8970, lr_0 = 8.9548e-04
Loss = 1.8988e-01, PNorm = 55.2611, GNorm = 2.8829, lr_0 = 8.9486e-04
Loss = 1.9196e-01, PNorm = 55.2888, GNorm = 1.3404, lr_0 = 8.9425e-04
Loss = 2.0077e-01, PNorm = 55.3217, GNorm = 1.3849, lr_0 = 8.9364e-04
Loss = 1.9553e-01, PNorm = 55.3480, GNorm = 0.7855, lr_0 = 8.9302e-04
Loss = 1.8875e-01, PNorm = 55.3718, GNorm = 0.8041, lr_0 = 8.9241e-04
Loss = 2.0371e-01, PNorm = 55.4016, GNorm = 1.0649, lr_0 = 8.9180e-04
Loss = 2.2374e-01, PNorm = 55.4217, GNorm = 0.9205, lr_0 = 8.9119e-04
Loss = 2.1608e-01, PNorm = 55.4513, GNorm = 2.6759, lr_0 = 8.9058e-04
Loss = 2.3201e-01, PNorm = 55.4754, GNorm = 0.7387, lr_0 = 8.8997e-04
Loss = 2.1274e-01, PNorm = 55.5065, GNorm = 2.1372, lr_0 = 8.8936e-04
Loss = 2.4268e-01, PNorm = 55.5386, GNorm = 1.0318, lr_0 = 8.8875e-04
Loss = 2.2738e-01, PNorm = 55.5683, GNorm = 1.9221, lr_0 = 8.8814e-04
Loss = 2.3741e-01, PNorm = 55.6093, GNorm = 0.7746, lr_0 = 8.8753e-04
Loss = 2.3960e-01, PNorm = 55.6417, GNorm = 1.1040, lr_0 = 8.8693e-04
Loss = 2.2706e-01, PNorm = 55.6795, GNorm = 0.9165, lr_0 = 8.8632e-04
Loss = 2.0562e-01, PNorm = 55.7015, GNorm = 1.0474, lr_0 = 8.8571e-04
Loss = 2.1038e-01, PNorm = 55.7236, GNorm = 0.8386, lr_0 = 8.8510e-04
Loss = 2.1096e-01, PNorm = 55.7450, GNorm = 0.6024, lr_0 = 8.8450e-04
Loss = 1.9066e-01, PNorm = 55.7616, GNorm = 0.7703, lr_0 = 8.8389e-04
Loss = 1.9598e-01, PNorm = 55.7833, GNorm = 1.0359, lr_0 = 8.8329e-04
Loss = 2.2883e-01, PNorm = 55.8134, GNorm = 1.0711, lr_0 = 8.8268e-04
Loss = 2.3616e-01, PNorm = 55.8333, GNorm = 0.8115, lr_0 = 8.8208e-04
Loss = 1.9328e-01, PNorm = 55.8523, GNorm = 2.0171, lr_0 = 8.8147e-04
Loss = 2.1808e-01, PNorm = 55.8778, GNorm = 0.9963, lr_0 = 8.8087e-04
Loss = 1.7262e-01, PNorm = 55.8965, GNorm = 1.0295, lr_0 = 8.8026e-04
Loss = 1.9196e-01, PNorm = 55.9195, GNorm = 1.2459, lr_0 = 8.7966e-04
Loss = 2.2753e-01, PNorm = 55.9367, GNorm = 2.1846, lr_0 = 8.7906e-04
Loss = 2.0639e-01, PNorm = 55.9766, GNorm = 1.6616, lr_0 = 8.7846e-04
Loss = 1.7705e-01, PNorm = 56.0026, GNorm = 1.3463, lr_0 = 8.7785e-04
Loss = 2.2036e-01, PNorm = 56.0345, GNorm = 2.4898, lr_0 = 8.7725e-04
Loss = 1.8544e-01, PNorm = 56.0652, GNorm = 0.6458, lr_0 = 8.7665e-04
Loss = 1.7407e-01, PNorm = 56.0931, GNorm = 0.7866, lr_0 = 8.7605e-04
Loss = 2.0291e-01, PNorm = 56.1125, GNorm = 0.6169, lr_0 = 8.7545e-04
Loss = 1.9173e-01, PNorm = 56.1360, GNorm = 0.8113, lr_0 = 8.7485e-04
Loss = 1.9213e-01, PNorm = 56.1623, GNorm = 1.1054, lr_0 = 8.7425e-04
Loss = 1.9989e-01, PNorm = 56.1752, GNorm = 0.9296, lr_0 = 8.7365e-04
Loss = 2.0602e-01, PNorm = 56.2090, GNorm = 1.3260, lr_0 = 8.7306e-04
Loss = 2.0187e-01, PNorm = 56.2335, GNorm = 0.9674, lr_0 = 8.7246e-04
Loss = 2.1343e-01, PNorm = 56.2752, GNorm = 2.7304, lr_0 = 8.7186e-04
Loss = 1.9922e-01, PNorm = 56.3085, GNorm = 0.7748, lr_0 = 8.7126e-04
Loss = 2.0872e-01, PNorm = 56.3424, GNorm = 0.7613, lr_0 = 8.7067e-04
Loss = 2.0316e-01, PNorm = 56.3640, GNorm = 1.9148, lr_0 = 8.7007e-04
Loss = 2.1444e-01, PNorm = 56.3836, GNorm = 2.1392, lr_0 = 8.6947e-04
Loss = 2.2570e-01, PNorm = 56.4137, GNorm = 0.8542, lr_0 = 8.6888e-04
Loss = 2.1386e-01, PNorm = 56.4380, GNorm = 1.9231, lr_0 = 8.6828e-04
Loss = 2.2519e-01, PNorm = 56.4707, GNorm = 2.2432, lr_0 = 8.6769e-04
Loss = 1.7839e-01, PNorm = 56.5005, GNorm = 1.3673, lr_0 = 8.6709e-04
Loss = 1.7634e-01, PNorm = 56.5226, GNorm = 0.5798, lr_0 = 8.6650e-04
Loss = 2.2442e-01, PNorm = 56.5431, GNorm = 0.8919, lr_0 = 8.6590e-04
Loss = 2.0319e-01, PNorm = 56.5769, GNorm = 0.9170, lr_0 = 8.6531e-04
Loss = 1.5647e-01, PNorm = 56.6111, GNorm = 0.6756, lr_0 = 8.6472e-04
Loss = 1.7886e-01, PNorm = 56.6309, GNorm = 0.9592, lr_0 = 8.6413e-04
Loss = 1.9867e-01, PNorm = 56.6645, GNorm = 1.2295, lr_0 = 8.6353e-04
Loss = 1.9581e-01, PNorm = 56.6873, GNorm = 0.6503, lr_0 = 8.6294e-04
Loss = 2.0220e-01, PNorm = 56.7071, GNorm = 1.1996, lr_0 = 8.6235e-04
Loss = 1.9689e-01, PNorm = 56.7295, GNorm = 0.7184, lr_0 = 8.6176e-04
Loss = 1.8047e-01, PNorm = 56.7528, GNorm = 0.7767, lr_0 = 8.6117e-04
Loss = 1.8852e-01, PNorm = 56.7754, GNorm = 1.4419, lr_0 = 8.6058e-04
Loss = 2.0866e-01, PNorm = 56.7991, GNorm = 2.5061, lr_0 = 8.5999e-04
Loss = 2.0203e-01, PNorm = 56.8227, GNorm = 0.9568, lr_0 = 8.5940e-04
Loss = 2.0430e-01, PNorm = 56.8433, GNorm = 1.9606, lr_0 = 8.5881e-04
Loss = 2.0525e-01, PNorm = 56.8680, GNorm = 1.5085, lr_0 = 8.5823e-04
Loss = 1.9957e-01, PNorm = 56.8906, GNorm = 1.5976, lr_0 = 8.5764e-04
Loss = 1.9683e-01, PNorm = 56.9141, GNorm = 1.4753, lr_0 = 8.5705e-04
Loss = 2.1210e-01, PNorm = 56.9430, GNorm = 0.6121, lr_0 = 8.5646e-04
Loss = 2.2061e-01, PNorm = 56.9603, GNorm = 0.6711, lr_0 = 8.5588e-04
Loss = 2.0031e-01, PNorm = 56.9874, GNorm = 1.5708, lr_0 = 8.5529e-04
Loss = 1.7916e-01, PNorm = 57.0135, GNorm = 0.6780, lr_0 = 8.5470e-04
Loss = 1.9086e-01, PNorm = 57.0380, GNorm = 0.8748, lr_0 = 8.5412e-04
Loss = 2.1169e-01, PNorm = 57.0665, GNorm = 1.6296, lr_0 = 8.5353e-04
Loss = 2.0161e-01, PNorm = 57.0901, GNorm = 0.9558, lr_0 = 8.5295e-04
Loss = 2.2227e-01, PNorm = 57.1273, GNorm = 1.6772, lr_0 = 8.5236e-04
Loss = 1.8799e-01, PNorm = 57.1620, GNorm = 0.5986, lr_0 = 8.5178e-04
Loss = 2.0171e-01, PNorm = 57.1859, GNorm = 0.8155, lr_0 = 8.5120e-04
Loss = 1.8133e-01, PNorm = 57.2109, GNorm = 1.7839, lr_0 = 8.5061e-04
Loss = 1.8995e-01, PNorm = 57.2307, GNorm = 0.7861, lr_0 = 8.5003e-04
Loss = 1.8634e-01, PNorm = 57.2514, GNorm = 1.0686, lr_0 = 8.4945e-04
Loss = 1.7864e-01, PNorm = 57.2719, GNorm = 2.0469, lr_0 = 8.4887e-04
Loss = 1.8920e-01, PNorm = 57.2946, GNorm = 1.2879, lr_0 = 8.4828e-04
Validation mae = 0.278243
Epoch 4
Loss = 1.8252e-01, PNorm = 57.3160, GNorm = 1.1378, lr_0 = 8.4770e-04
Loss = 1.7522e-01, PNorm = 57.3360, GNorm = 1.1047, lr_0 = 8.4712e-04
Loss = 1.6920e-01, PNorm = 57.3598, GNorm = 0.8378, lr_0 = 8.4654e-04
Loss = 1.7475e-01, PNorm = 57.3805, GNorm = 1.2312, lr_0 = 8.4596e-04
Loss = 1.7563e-01, PNorm = 57.4126, GNorm = 0.9389, lr_0 = 8.4538e-04
Loss = 1.9719e-01, PNorm = 57.4474, GNorm = 2.3411, lr_0 = 8.4480e-04
Loss = 1.9107e-01, PNorm = 57.4812, GNorm = 2.0196, lr_0 = 8.4423e-04
Loss = 1.7584e-01, PNorm = 57.5183, GNorm = 2.1416, lr_0 = 8.4365e-04
Loss = 2.0617e-01, PNorm = 57.5390, GNorm = 1.4913, lr_0 = 8.4307e-04
Loss = 1.8352e-01, PNorm = 57.5593, GNorm = 0.8805, lr_0 = 8.4249e-04
Loss = 1.8145e-01, PNorm = 57.5788, GNorm = 0.6556, lr_0 = 8.4191e-04
Loss = 1.8312e-01, PNorm = 57.6035, GNorm = 0.6963, lr_0 = 8.4134e-04
Loss = 1.9727e-01, PNorm = 57.6265, GNorm = 1.2738, lr_0 = 8.4076e-04
Loss = 1.8136e-01, PNorm = 57.6565, GNorm = 3.0877, lr_0 = 8.4019e-04
Loss = 2.2128e-01, PNorm = 57.6941, GNorm = 0.7934, lr_0 = 8.3961e-04
Loss = 2.0759e-01, PNorm = 57.7312, GNorm = 0.9233, lr_0 = 8.3903e-04
Loss = 2.2910e-01, PNorm = 57.7678, GNorm = 0.7282, lr_0 = 8.3846e-04
Loss = 2.1685e-01, PNorm = 57.7879, GNorm = 1.3083, lr_0 = 8.3789e-04
Loss = 1.7169e-01, PNorm = 57.8105, GNorm = 0.9111, lr_0 = 8.3731e-04
Loss = 2.0203e-01, PNorm = 57.8289, GNorm = 0.7609, lr_0 = 8.3674e-04
Loss = 1.8118e-01, PNorm = 57.8626, GNorm = 0.9365, lr_0 = 8.3616e-04
Loss = 1.9803e-01, PNorm = 57.8840, GNorm = 0.8119, lr_0 = 8.3559e-04
Loss = 1.9362e-01, PNorm = 57.9119, GNorm = 1.8583, lr_0 = 8.3502e-04
Loss = 1.9417e-01, PNorm = 57.9369, GNorm = 0.6073, lr_0 = 8.3445e-04
Loss = 1.9274e-01, PNorm = 57.9535, GNorm = 1.0413, lr_0 = 8.3388e-04
Loss = 1.6625e-01, PNorm = 57.9816, GNorm = 0.7625, lr_0 = 8.3330e-04
Loss = 1.9689e-01, PNorm = 58.0041, GNorm = 1.0439, lr_0 = 8.3273e-04
Loss = 1.7138e-01, PNorm = 58.0324, GNorm = 0.7495, lr_0 = 8.3216e-04
Loss = 1.8020e-01, PNorm = 58.0518, GNorm = 1.0064, lr_0 = 8.3159e-04
Loss = 1.8334e-01, PNorm = 58.0732, GNorm = 1.5066, lr_0 = 8.3102e-04
Loss = 2.0918e-01, PNorm = 58.0909, GNorm = 0.7907, lr_0 = 8.3045e-04
Loss = 1.9516e-01, PNorm = 58.1136, GNorm = 1.6810, lr_0 = 8.2988e-04
Loss = 1.7141e-01, PNorm = 58.1434, GNorm = 1.1194, lr_0 = 8.2932e-04
Loss = 1.8086e-01, PNorm = 58.1667, GNorm = 1.0920, lr_0 = 8.2875e-04
Loss = 1.6050e-01, PNorm = 58.1875, GNorm = 0.7751, lr_0 = 8.2818e-04
Loss = 1.7236e-01, PNorm = 58.2131, GNorm = 0.6866, lr_0 = 8.2761e-04
Loss = 1.8324e-01, PNorm = 58.2315, GNorm = 1.0749, lr_0 = 8.2705e-04
Loss = 1.9611e-01, PNorm = 58.2567, GNorm = 1.7864, lr_0 = 8.2648e-04
Loss = 2.0035e-01, PNorm = 58.2892, GNorm = 0.7125, lr_0 = 8.2591e-04
Loss = 2.1681e-01, PNorm = 58.3162, GNorm = 0.7994, lr_0 = 8.2535e-04
Loss = 1.8115e-01, PNorm = 58.3397, GNorm = 0.9332, lr_0 = 8.2478e-04
Loss = 1.8317e-01, PNorm = 58.3612, GNorm = 0.8405, lr_0 = 8.2422e-04
Loss = 1.9228e-01, PNorm = 58.3836, GNorm = 1.0438, lr_0 = 8.2365e-04
Loss = 1.7279e-01, PNorm = 58.4096, GNorm = 0.6194, lr_0 = 8.2309e-04
Loss = 1.8653e-01, PNorm = 58.4320, GNorm = 0.6416, lr_0 = 8.2252e-04
Loss = 1.8185e-01, PNorm = 58.4615, GNorm = 0.6912, lr_0 = 8.2196e-04
Loss = 2.0133e-01, PNorm = 58.4852, GNorm = 0.7766, lr_0 = 8.2140e-04
Loss = 1.8889e-01, PNorm = 58.5121, GNorm = 1.3017, lr_0 = 8.2084e-04
Loss = 1.7094e-01, PNorm = 58.5308, GNorm = 0.8575, lr_0 = 8.2027e-04
Loss = 1.7543e-01, PNorm = 58.5604, GNorm = 1.0404, lr_0 = 8.1971e-04
Loss = 1.8698e-01, PNorm = 58.5720, GNorm = 1.2597, lr_0 = 8.1915e-04
Loss = 2.0177e-01, PNorm = 58.6038, GNorm = 0.7607, lr_0 = 8.1859e-04
Loss = 1.7283e-01, PNorm = 58.6248, GNorm = 0.7504, lr_0 = 8.1803e-04
Loss = 1.8432e-01, PNorm = 58.6447, GNorm = 0.6148, lr_0 = 8.1747e-04
Loss = 1.9804e-01, PNorm = 58.6709, GNorm = 1.3770, lr_0 = 8.1691e-04
Loss = 1.7402e-01, PNorm = 58.7008, GNorm = 1.9390, lr_0 = 8.1635e-04
Loss = 1.5978e-01, PNorm = 58.7282, GNorm = 0.6787, lr_0 = 8.1579e-04
Loss = 1.8348e-01, PNorm = 58.7583, GNorm = 0.8001, lr_0 = 8.1523e-04
Loss = 1.7347e-01, PNorm = 58.7766, GNorm = 1.1307, lr_0 = 8.1467e-04
Loss = 1.8623e-01, PNorm = 58.7997, GNorm = 1.2000, lr_0 = 8.1411e-04
Loss = 1.7728e-01, PNorm = 58.8296, GNorm = 1.3252, lr_0 = 8.1355e-04
Loss = 1.8235e-01, PNorm = 58.8512, GNorm = 0.9740, lr_0 = 8.1300e-04
Loss = 1.9297e-01, PNorm = 58.8743, GNorm = 2.8713, lr_0 = 8.1244e-04
Loss = 2.0119e-01, PNorm = 58.9034, GNorm = 0.8208, lr_0 = 8.1188e-04
Loss = 1.6884e-01, PNorm = 58.9349, GNorm = 0.8962, lr_0 = 8.1133e-04
Loss = 1.9371e-01, PNorm = 58.9569, GNorm = 1.1803, lr_0 = 8.1077e-04
Loss = 2.0166e-01, PNorm = 58.9851, GNorm = 2.6074, lr_0 = 8.1022e-04
Loss = 1.8475e-01, PNorm = 59.0091, GNorm = 0.6733, lr_0 = 8.0966e-04
Loss = 1.8662e-01, PNorm = 59.0370, GNorm = 1.4278, lr_0 = 8.0911e-04
Loss = 2.0072e-01, PNorm = 59.0667, GNorm = 0.9120, lr_0 = 8.0855e-04
Loss = 1.7763e-01, PNorm = 59.0884, GNorm = 0.8482, lr_0 = 8.0800e-04
Loss = 1.9312e-01, PNorm = 59.1125, GNorm = 1.0832, lr_0 = 8.0745e-04
Loss = 1.9605e-01, PNorm = 59.1298, GNorm = 0.6052, lr_0 = 8.0689e-04
Loss = 1.8075e-01, PNorm = 59.1553, GNorm = 1.1901, lr_0 = 8.0634e-04
Loss = 2.1829e-01, PNorm = 59.1849, GNorm = 1.5691, lr_0 = 8.0579e-04
Loss = 2.0116e-01, PNorm = 59.2248, GNorm = 2.3087, lr_0 = 8.0523e-04
Loss = 1.9087e-01, PNorm = 59.2463, GNorm = 0.9651, lr_0 = 8.0468e-04
Loss = 1.9374e-01, PNorm = 59.2672, GNorm = 1.3356, lr_0 = 8.0413e-04
Loss = 1.9423e-01, PNorm = 59.2880, GNorm = 1.6758, lr_0 = 8.0358e-04
Loss = 2.3626e-01, PNorm = 59.3126, GNorm = 1.9583, lr_0 = 8.0303e-04
Loss = 2.2079e-01, PNorm = 59.3512, GNorm = 0.6025, lr_0 = 8.0248e-04
Loss = 2.0130e-01, PNorm = 59.3841, GNorm = 0.6159, lr_0 = 8.0193e-04
Loss = 1.7755e-01, PNorm = 59.4161, GNorm = 1.5330, lr_0 = 8.0138e-04
Loss = 1.8852e-01, PNorm = 59.4425, GNorm = 2.4991, lr_0 = 8.0083e-04
Loss = 1.9928e-01, PNorm = 59.4701, GNorm = 1.1031, lr_0 = 8.0028e-04
Loss = 1.8394e-01, PNorm = 59.4992, GNorm = 0.6861, lr_0 = 7.9974e-04
Loss = 2.0310e-01, PNorm = 59.5135, GNorm = 0.5719, lr_0 = 7.9919e-04
Loss = 1.6782e-01, PNorm = 59.5411, GNorm = 1.1613, lr_0 = 7.9864e-04
Loss = 1.8173e-01, PNorm = 59.5596, GNorm = 1.4549, lr_0 = 7.9809e-04
Loss = 1.8631e-01, PNorm = 59.5851, GNorm = 1.2297, lr_0 = 7.9755e-04
Loss = 1.6654e-01, PNorm = 59.6130, GNorm = 0.8100, lr_0 = 7.9700e-04
Loss = 1.7485e-01, PNorm = 59.6372, GNorm = 1.8439, lr_0 = 7.9645e-04
Loss = 1.9246e-01, PNorm = 59.6591, GNorm = 0.7312, lr_0 = 7.9591e-04
Loss = 1.9676e-01, PNorm = 59.6810, GNorm = 0.5694, lr_0 = 7.9536e-04
Loss = 1.9079e-01, PNorm = 59.7071, GNorm = 1.7351, lr_0 = 7.9482e-04
Loss = 2.0651e-01, PNorm = 59.7278, GNorm = 1.9978, lr_0 = 7.9427e-04
Loss = 1.8014e-01, PNorm = 59.7575, GNorm = 1.4589, lr_0 = 7.9373e-04
Loss = 1.9232e-01, PNorm = 59.7878, GNorm = 1.1767, lr_0 = 7.9319e-04
Loss = 1.6792e-01, PNorm = 59.8170, GNorm = 1.4384, lr_0 = 7.9264e-04
Loss = 1.7116e-01, PNorm = 59.8388, GNorm = 1.2344, lr_0 = 7.9210e-04
Loss = 1.9750e-01, PNorm = 59.8641, GNorm = 1.0441, lr_0 = 7.9156e-04
Loss = 1.8745e-01, PNorm = 59.8932, GNorm = 1.7407, lr_0 = 7.9101e-04
Loss = 1.9753e-01, PNorm = 59.9155, GNorm = 1.1982, lr_0 = 7.9047e-04
Loss = 1.6053e-01, PNorm = 59.9416, GNorm = 0.8849, lr_0 = 7.8993e-04
Loss = 1.8721e-01, PNorm = 59.9639, GNorm = 0.8489, lr_0 = 7.8939e-04
Loss = 1.6894e-01, PNorm = 59.9786, GNorm = 0.6558, lr_0 = 7.8885e-04
Loss = 1.7981e-01, PNorm = 59.9994, GNorm = 1.8081, lr_0 = 7.8831e-04
Loss = 1.6591e-01, PNorm = 60.0174, GNorm = 0.8011, lr_0 = 7.8777e-04
Loss = 1.7399e-01, PNorm = 60.0392, GNorm = 1.2520, lr_0 = 7.8723e-04
Loss = 2.0738e-01, PNorm = 60.0668, GNorm = 0.8158, lr_0 = 7.8669e-04
Loss = 1.7961e-01, PNorm = 60.0912, GNorm = 0.7210, lr_0 = 7.8615e-04
Loss = 1.8683e-01, PNorm = 60.1122, GNorm = 0.6291, lr_0 = 7.8561e-04
Loss = 1.7498e-01, PNorm = 60.1322, GNorm = 0.6413, lr_0 = 7.8507e-04
Loss = 1.7417e-01, PNorm = 60.1518, GNorm = 0.8455, lr_0 = 7.8454e-04
Loss = 1.8095e-01, PNorm = 60.1775, GNorm = 1.9117, lr_0 = 7.8400e-04
Loss = 2.3566e-01, PNorm = 60.1931, GNorm = 0.9456, lr_0 = 7.8346e-04
Loss = 1.7925e-01, PNorm = 60.2179, GNorm = 0.8715, lr_0 = 7.8293e-04
Loss = 1.6618e-01, PNorm = 60.2354, GNorm = 0.8487, lr_0 = 7.8239e-04
Loss = 1.7963e-01, PNorm = 60.2573, GNorm = 0.7161, lr_0 = 7.8185e-04
Loss = 1.9224e-01, PNorm = 60.2830, GNorm = 1.1766, lr_0 = 7.8132e-04
Validation mae = 0.257261
Epoch 5
Loss = 1.6856e-01, PNorm = 60.3066, GNorm = 0.8294, lr_0 = 7.8078e-04
Loss = 1.9096e-01, PNorm = 60.3301, GNorm = 1.7010, lr_0 = 7.8025e-04
Loss = 1.9179e-01, PNorm = 60.3583, GNorm = 1.6453, lr_0 = 7.7971e-04
Loss = 1.8773e-01, PNorm = 60.3961, GNorm = 2.5688, lr_0 = 7.7918e-04
Loss = 2.0069e-01, PNorm = 60.4354, GNorm = 0.7081, lr_0 = 7.7864e-04
Loss = 1.6306e-01, PNorm = 60.4683, GNorm = 0.5862, lr_0 = 7.7811e-04
Loss = 1.5347e-01, PNorm = 60.4896, GNorm = 0.8188, lr_0 = 7.7758e-04
Loss = 1.4767e-01, PNorm = 60.5077, GNorm = 0.7853, lr_0 = 7.7705e-04
Loss = 1.5396e-01, PNorm = 60.5273, GNorm = 0.8743, lr_0 = 7.7651e-04
Loss = 1.8415e-01, PNorm = 60.5499, GNorm = 1.3388, lr_0 = 7.7598e-04
Loss = 2.0156e-01, PNorm = 60.5795, GNorm = 0.5609, lr_0 = 7.7545e-04
Loss = 1.9075e-01, PNorm = 60.6000, GNorm = 0.7934, lr_0 = 7.7492e-04
Loss = 1.6599e-01, PNorm = 60.6172, GNorm = 1.0995, lr_0 = 7.7439e-04
Loss = 1.9738e-01, PNorm = 60.6368, GNorm = 0.8249, lr_0 = 7.7386e-04
Loss = 1.6859e-01, PNorm = 60.6594, GNorm = 0.7405, lr_0 = 7.7333e-04
Loss = 1.8458e-01, PNorm = 60.6853, GNorm = 0.7699, lr_0 = 7.7280e-04
Loss = 1.7786e-01, PNorm = 60.7130, GNorm = 1.9612, lr_0 = 7.7227e-04
Loss = 1.8597e-01, PNorm = 60.7392, GNorm = 0.9273, lr_0 = 7.7174e-04
Loss = 1.6881e-01, PNorm = 60.7637, GNorm = 0.6618, lr_0 = 7.7121e-04
Loss = 1.6538e-01, PNorm = 60.7779, GNorm = 1.0811, lr_0 = 7.7068e-04
Loss = 1.6962e-01, PNorm = 60.8026, GNorm = 1.0092, lr_0 = 7.7015e-04
Loss = 1.6542e-01, PNorm = 60.8269, GNorm = 0.6907, lr_0 = 7.6963e-04
Loss = 1.5373e-01, PNorm = 60.8539, GNorm = 0.5443, lr_0 = 7.6910e-04
Loss = 1.7775e-01, PNorm = 60.8757, GNorm = 0.6081, lr_0 = 7.6857e-04
Loss = 1.6667e-01, PNorm = 60.9044, GNorm = 1.7908, lr_0 = 7.6805e-04
Loss = 1.9381e-01, PNorm = 60.9193, GNorm = 0.9987, lr_0 = 7.6752e-04
Loss = 1.6982e-01, PNorm = 60.9409, GNorm = 0.6626, lr_0 = 7.6699e-04
Loss = 1.8421e-01, PNorm = 60.9606, GNorm = 1.1338, lr_0 = 7.6647e-04
Loss = 1.6921e-01, PNorm = 60.9738, GNorm = 0.5991, lr_0 = 7.6594e-04
Loss = 1.7013e-01, PNorm = 60.9943, GNorm = 2.2570, lr_0 = 7.6542e-04
Loss = 1.5730e-01, PNorm = 61.0161, GNorm = 0.5694, lr_0 = 7.6489e-04
Loss = 1.7287e-01, PNorm = 61.0374, GNorm = 0.6526, lr_0 = 7.6437e-04
Loss = 1.5998e-01, PNorm = 61.0577, GNorm = 1.1524, lr_0 = 7.6385e-04
Loss = 1.7269e-01, PNorm = 61.0847, GNorm = 0.8778, lr_0 = 7.6332e-04
Loss = 1.7172e-01, PNorm = 61.1077, GNorm = 0.6735, lr_0 = 7.6280e-04
Loss = 1.7810e-01, PNorm = 61.1295, GNorm = 0.5973, lr_0 = 7.6228e-04
Loss = 1.7792e-01, PNorm = 61.1605, GNorm = 0.8637, lr_0 = 7.6176e-04
Loss = 1.9501e-01, PNorm = 61.1935, GNorm = 0.7308, lr_0 = 7.6123e-04
Loss = 1.7289e-01, PNorm = 61.2320, GNorm = 0.6789, lr_0 = 7.6071e-04
Loss = 1.6702e-01, PNorm = 61.2591, GNorm = 0.9419, lr_0 = 7.6019e-04
Loss = 1.9628e-01, PNorm = 61.2811, GNorm = 0.6537, lr_0 = 7.5967e-04
Loss = 1.8060e-01, PNorm = 61.3107, GNorm = 1.1966, lr_0 = 7.5915e-04
Loss = 1.7392e-01, PNorm = 61.3319, GNorm = 1.1619, lr_0 = 7.5863e-04
Loss = 1.7452e-01, PNorm = 61.3488, GNorm = 1.0383, lr_0 = 7.5811e-04
Loss = 1.8575e-01, PNorm = 61.3750, GNorm = 0.8030, lr_0 = 7.5759e-04
Loss = 2.2336e-01, PNorm = 61.4138, GNorm = 0.8886, lr_0 = 7.5707e-04
Loss = 1.8220e-01, PNorm = 61.4380, GNorm = 0.9952, lr_0 = 7.5655e-04
Loss = 1.6922e-01, PNorm = 61.4693, GNorm = 0.8155, lr_0 = 7.5603e-04
Loss = 1.7434e-01, PNorm = 61.4957, GNorm = 1.8205, lr_0 = 7.5552e-04
Loss = 1.6892e-01, PNorm = 61.5266, GNorm = 0.6172, lr_0 = 7.5500e-04
Loss = 1.5724e-01, PNorm = 61.5534, GNorm = 0.6772, lr_0 = 7.5448e-04
Loss = 1.8517e-01, PNorm = 61.5788, GNorm = 0.5755, lr_0 = 7.5397e-04
Loss = 1.6097e-01, PNorm = 61.5973, GNorm = 0.7405, lr_0 = 7.5345e-04
Loss = 1.9086e-01, PNorm = 61.6213, GNorm = 1.6981, lr_0 = 7.5293e-04
Loss = 1.9286e-01, PNorm = 61.6576, GNorm = 1.7339, lr_0 = 7.5242e-04
Loss = 1.9046e-01, PNorm = 61.6859, GNorm = 1.0216, lr_0 = 7.5190e-04
Loss = 1.9146e-01, PNorm = 61.7147, GNorm = 0.9998, lr_0 = 7.5139e-04
Loss = 1.6616e-01, PNorm = 61.7417, GNorm = 1.0799, lr_0 = 7.5087e-04
Loss = 1.5362e-01, PNorm = 61.7632, GNorm = 0.7856, lr_0 = 7.5036e-04
Loss = 1.6646e-01, PNorm = 61.7865, GNorm = 1.1547, lr_0 = 7.4984e-04
Loss = 1.8075e-01, PNorm = 61.8149, GNorm = 0.6200, lr_0 = 7.4933e-04
Loss = 1.8986e-01, PNorm = 61.8432, GNorm = 1.0948, lr_0 = 7.4882e-04
Loss = 1.6792e-01, PNorm = 61.8647, GNorm = 0.9044, lr_0 = 7.4830e-04
Loss = 1.7750e-01, PNorm = 61.8867, GNorm = 1.3122, lr_0 = 7.4779e-04
Loss = 1.9773e-01, PNorm = 61.9120, GNorm = 0.7746, lr_0 = 7.4728e-04
Loss = 1.6800e-01, PNorm = 61.9386, GNorm = 0.8219, lr_0 = 7.4677e-04
Loss = 1.8767e-01, PNorm = 61.9619, GNorm = 0.7014, lr_0 = 7.4625e-04
Loss = 1.7992e-01, PNorm = 61.9788, GNorm = 1.6221, lr_0 = 7.4574e-04
Loss = 1.5640e-01, PNorm = 62.0121, GNorm = 2.3690, lr_0 = 7.4523e-04
Loss = 1.8398e-01, PNorm = 62.0359, GNorm = 2.1516, lr_0 = 7.4472e-04
Loss = 1.9693e-01, PNorm = 62.0620, GNorm = 0.8786, lr_0 = 7.4421e-04
Loss = 1.8537e-01, PNorm = 62.0915, GNorm = 0.9477, lr_0 = 7.4370e-04
Loss = 1.5724e-01, PNorm = 62.1202, GNorm = 0.9202, lr_0 = 7.4319e-04
Loss = 1.5830e-01, PNorm = 62.1411, GNorm = 0.8279, lr_0 = 7.4268e-04
Loss = 1.8771e-01, PNorm = 62.1626, GNorm = 1.7048, lr_0 = 7.4217e-04
Loss = 1.6895e-01, PNorm = 62.1882, GNorm = 1.4825, lr_0 = 7.4167e-04
Loss = 1.6095e-01, PNorm = 62.2146, GNorm = 1.0879, lr_0 = 7.4116e-04
Loss = 1.8684e-01, PNorm = 62.2444, GNorm = 0.6467, lr_0 = 7.4065e-04
Loss = 1.8899e-01, PNorm = 62.2812, GNorm = 0.7953, lr_0 = 7.4014e-04
Loss = 1.7535e-01, PNorm = 62.3060, GNorm = 0.9304, lr_0 = 7.3964e-04
Loss = 1.8815e-01, PNorm = 62.3250, GNorm = 0.7780, lr_0 = 7.3913e-04
Loss = 2.0075e-01, PNorm = 62.3524, GNorm = 1.9748, lr_0 = 7.3862e-04
Loss = 1.8268e-01, PNorm = 62.3824, GNorm = 0.9557, lr_0 = 7.3812e-04
Loss = 1.5639e-01, PNorm = 62.4063, GNorm = 0.7351, lr_0 = 7.3761e-04
Loss = 1.7968e-01, PNorm = 62.4278, GNorm = 1.3330, lr_0 = 7.3711e-04
Loss = 1.8681e-01, PNorm = 62.4582, GNorm = 1.0278, lr_0 = 7.3660e-04
Loss = 1.7436e-01, PNorm = 62.4834, GNorm = 2.2063, lr_0 = 7.3610e-04
Loss = 2.0231e-01, PNorm = 62.5102, GNorm = 1.3624, lr_0 = 7.3559e-04
Loss = 1.6593e-01, PNorm = 62.5377, GNorm = 0.7447, lr_0 = 7.3509e-04
Loss = 1.8408e-01, PNorm = 62.5559, GNorm = 0.8629, lr_0 = 7.3458e-04
Loss = 1.7997e-01, PNorm = 62.5865, GNorm = 0.8791, lr_0 = 7.3408e-04
Loss = 1.5803e-01, PNorm = 62.6163, GNorm = 2.3198, lr_0 = 7.3358e-04
Loss = 1.7870e-01, PNorm = 62.6438, GNorm = 1.5483, lr_0 = 7.3308e-04
Loss = 1.8908e-01, PNorm = 62.6646, GNorm = 0.7946, lr_0 = 7.3257e-04
Loss = 1.6230e-01, PNorm = 62.6832, GNorm = 0.5846, lr_0 = 7.3207e-04
Loss = 1.7360e-01, PNorm = 62.7006, GNorm = 0.7473, lr_0 = 7.3157e-04
Loss = 1.4510e-01, PNorm = 62.7166, GNorm = 1.1260, lr_0 = 7.3107e-04
Loss = 1.8348e-01, PNorm = 62.7379, GNorm = 0.9362, lr_0 = 7.3057e-04
Loss = 1.9066e-01, PNorm = 62.7578, GNorm = 0.6529, lr_0 = 7.3007e-04
Loss = 1.7479e-01, PNorm = 62.7821, GNorm = 1.1517, lr_0 = 7.2957e-04
Loss = 1.5543e-01, PNorm = 62.8025, GNorm = 1.2124, lr_0 = 7.2907e-04
Loss = 1.9018e-01, PNorm = 62.8239, GNorm = 1.4415, lr_0 = 7.2857e-04
Loss = 1.6116e-01, PNorm = 62.8518, GNorm = 0.7684, lr_0 = 7.2807e-04
Loss = 1.8679e-01, PNorm = 62.8725, GNorm = 0.8169, lr_0 = 7.2757e-04
Loss = 2.0097e-01, PNorm = 62.8932, GNorm = 0.9506, lr_0 = 7.2707e-04
Loss = 1.8662e-01, PNorm = 62.9229, GNorm = 0.9094, lr_0 = 7.2657e-04
Loss = 1.6060e-01, PNorm = 62.9463, GNorm = 1.0659, lr_0 = 7.2608e-04
Loss = 1.9351e-01, PNorm = 62.9773, GNorm = 1.0038, lr_0 = 7.2558e-04
Loss = 1.7593e-01, PNorm = 63.0036, GNorm = 0.6010, lr_0 = 7.2508e-04
Loss = 1.5198e-01, PNorm = 63.0290, GNorm = 1.0178, lr_0 = 7.2458e-04
Loss = 1.8315e-01, PNorm = 63.0544, GNorm = 2.4568, lr_0 = 7.2409e-04
Loss = 1.7800e-01, PNorm = 63.0786, GNorm = 0.9113, lr_0 = 7.2359e-04
Loss = 1.4663e-01, PNorm = 63.0959, GNorm = 0.4979, lr_0 = 7.2310e-04
Loss = 1.9264e-01, PNorm = 63.1138, GNorm = 1.1274, lr_0 = 7.2260e-04
Loss = 1.5273e-01, PNorm = 63.1383, GNorm = 0.6372, lr_0 = 7.2211e-04
Loss = 1.6067e-01, PNorm = 63.1559, GNorm = 0.8498, lr_0 = 7.2161e-04
Loss = 2.0403e-01, PNorm = 63.1743, GNorm = 0.7007, lr_0 = 7.2112e-04
Loss = 1.9272e-01, PNorm = 63.2032, GNorm = 0.9857, lr_0 = 7.2062e-04
Loss = 1.7214e-01, PNorm = 63.2260, GNorm = 0.6340, lr_0 = 7.2013e-04
Loss = 1.7926e-01, PNorm = 63.2495, GNorm = 1.4591, lr_0 = 7.1964e-04
Validation mae = 0.252948
Epoch 6
Loss = 1.7046e-01, PNorm = 63.2665, GNorm = 0.6558, lr_0 = 7.1914e-04
Loss = 1.4345e-01, PNorm = 63.2928, GNorm = 0.6142, lr_0 = 7.1865e-04
Loss = 1.7572e-01, PNorm = 63.3126, GNorm = 1.1617, lr_0 = 7.1816e-04
Loss = 1.5927e-01, PNorm = 63.3359, GNorm = 0.5778, lr_0 = 7.1767e-04
Loss = 1.6335e-01, PNorm = 63.3570, GNorm = 0.6711, lr_0 = 7.1717e-04
Loss = 1.7245e-01, PNorm = 63.3800, GNorm = 0.8504, lr_0 = 7.1668e-04
Loss = 1.6562e-01, PNorm = 63.4023, GNorm = 0.8568, lr_0 = 7.1619e-04
Loss = 1.4987e-01, PNorm = 63.4236, GNorm = 0.6669, lr_0 = 7.1570e-04
Loss = 1.6234e-01, PNorm = 63.4434, GNorm = 0.5668, lr_0 = 7.1521e-04
Loss = 1.7716e-01, PNorm = 63.4746, GNorm = 1.2244, lr_0 = 7.1472e-04
Loss = 1.7558e-01, PNorm = 63.5033, GNorm = 0.6980, lr_0 = 7.1423e-04
Loss = 1.8842e-01, PNorm = 63.5268, GNorm = 1.7628, lr_0 = 7.1374e-04
Loss = 1.3848e-01, PNorm = 63.5459, GNorm = 0.8101, lr_0 = 7.1325e-04
Loss = 1.6093e-01, PNorm = 63.5663, GNorm = 0.6362, lr_0 = 7.1277e-04
Loss = 1.5339e-01, PNorm = 63.5931, GNorm = 0.6376, lr_0 = 7.1228e-04
Loss = 1.6909e-01, PNorm = 63.6150, GNorm = 0.8260, lr_0 = 7.1179e-04
Loss = 1.9161e-01, PNorm = 63.6320, GNorm = 1.5739, lr_0 = 7.1130e-04
Loss = 2.0894e-01, PNorm = 63.6619, GNorm = 1.1279, lr_0 = 7.1081e-04
Loss = 1.8829e-01, PNorm = 63.6948, GNorm = 0.5309, lr_0 = 7.1033e-04
Loss = 1.6423e-01, PNorm = 63.7245, GNorm = 0.6380, lr_0 = 7.0984e-04
Loss = 1.6650e-01, PNorm = 63.7518, GNorm = 0.7645, lr_0 = 7.0935e-04
Loss = 1.4587e-01, PNorm = 63.7783, GNorm = 0.5225, lr_0 = 7.0887e-04
Loss = 1.5309e-01, PNorm = 63.7950, GNorm = 0.5674, lr_0 = 7.0838e-04
Loss = 1.6043e-01, PNorm = 63.8085, GNorm = 1.0425, lr_0 = 7.0790e-04
Loss = 1.5138e-01, PNorm = 63.8336, GNorm = 0.5864, lr_0 = 7.0741e-04
Loss = 1.5276e-01, PNorm = 63.8528, GNorm = 0.8805, lr_0 = 7.0693e-04
Loss = 1.5026e-01, PNorm = 63.8803, GNorm = 0.7740, lr_0 = 7.0644e-04
Loss = 1.7038e-01, PNorm = 63.9011, GNorm = 0.8832, lr_0 = 7.0596e-04
Loss = 1.8122e-01, PNorm = 63.9191, GNorm = 1.6181, lr_0 = 7.0548e-04
Loss = 1.5795e-01, PNorm = 63.9362, GNorm = 0.7679, lr_0 = 7.0499e-04
Loss = 1.6757e-01, PNorm = 63.9602, GNorm = 0.7939, lr_0 = 7.0451e-04
Loss = 1.7171e-01, PNorm = 63.9788, GNorm = 0.8464, lr_0 = 7.0403e-04
Loss = 1.5745e-01, PNorm = 64.0048, GNorm = 1.0982, lr_0 = 7.0354e-04
Loss = 1.7069e-01, PNorm = 64.0274, GNorm = 1.1086, lr_0 = 7.0306e-04
Loss = 1.6839e-01, PNorm = 64.0584, GNorm = 2.4647, lr_0 = 7.0258e-04
Loss = 1.6926e-01, PNorm = 64.0846, GNorm = 0.7248, lr_0 = 7.0210e-04
Loss = 1.5187e-01, PNorm = 64.1082, GNorm = 1.2672, lr_0 = 7.0162e-04
Loss = 1.8035e-01, PNorm = 64.1298, GNorm = 1.4300, lr_0 = 7.0114e-04
Loss = 1.7074e-01, PNorm = 64.1469, GNorm = 0.6384, lr_0 = 7.0066e-04
Loss = 1.5154e-01, PNorm = 64.1666, GNorm = 1.0309, lr_0 = 7.0018e-04
Loss = 1.5952e-01, PNorm = 64.1844, GNorm = 0.7343, lr_0 = 6.9970e-04
Loss = 1.6357e-01, PNorm = 64.2055, GNorm = 1.0004, lr_0 = 6.9922e-04
Loss = 1.7180e-01, PNorm = 64.2237, GNorm = 1.2188, lr_0 = 6.9874e-04
Loss = 1.9079e-01, PNorm = 64.2498, GNorm = 0.7042, lr_0 = 6.9826e-04
Loss = 1.6202e-01, PNorm = 64.2683, GNorm = 1.0239, lr_0 = 6.9778e-04
Loss = 1.6060e-01, PNorm = 64.2884, GNorm = 1.7492, lr_0 = 6.9730e-04
Loss = 1.8415e-01, PNorm = 64.3128, GNorm = 1.5537, lr_0 = 6.9683e-04
Loss = 1.5098e-01, PNorm = 64.3386, GNorm = 1.1278, lr_0 = 6.9635e-04
Loss = 1.8087e-01, PNorm = 64.3632, GNorm = 0.6480, lr_0 = 6.9587e-04
Loss = 1.7499e-01, PNorm = 64.3923, GNorm = 0.8204, lr_0 = 6.9540e-04
Loss = 1.4956e-01, PNorm = 64.4094, GNorm = 0.9060, lr_0 = 6.9492e-04
Loss = 1.7980e-01, PNorm = 64.4285, GNorm = 1.0053, lr_0 = 6.9444e-04
Loss = 1.5344e-01, PNorm = 64.4476, GNorm = 0.6954, lr_0 = 6.9397e-04
Loss = 1.6428e-01, PNorm = 64.4723, GNorm = 1.4533, lr_0 = 6.9349e-04
Loss = 1.3812e-01, PNorm = 64.4943, GNorm = 0.5878, lr_0 = 6.9302e-04
Loss = 1.6288e-01, PNorm = 64.5117, GNorm = 0.9026, lr_0 = 6.9254e-04
Loss = 1.6533e-01, PNorm = 64.5254, GNorm = 1.0301, lr_0 = 6.9207e-04
Loss = 1.6181e-01, PNorm = 64.5477, GNorm = 0.6615, lr_0 = 6.9159e-04
Loss = 1.6444e-01, PNorm = 64.5774, GNorm = 0.8508, lr_0 = 6.9112e-04
Loss = 1.6154e-01, PNorm = 64.6053, GNorm = 0.7699, lr_0 = 6.9065e-04
Loss = 1.5069e-01, PNorm = 64.6191, GNorm = 0.6804, lr_0 = 6.9017e-04
Loss = 1.6155e-01, PNorm = 64.6418, GNorm = 0.5947, lr_0 = 6.8970e-04
Loss = 1.7852e-01, PNorm = 64.6640, GNorm = 1.1003, lr_0 = 6.8923e-04
Loss = 1.6981e-01, PNorm = 64.6871, GNorm = 0.8459, lr_0 = 6.8876e-04
Loss = 1.6024e-01, PNorm = 64.7064, GNorm = 0.6223, lr_0 = 6.8828e-04
Loss = 1.6091e-01, PNorm = 64.7189, GNorm = 1.4036, lr_0 = 6.8781e-04
Loss = 1.5097e-01, PNorm = 64.7354, GNorm = 1.3644, lr_0 = 6.8734e-04
Loss = 1.6243e-01, PNorm = 64.7535, GNorm = 0.9849, lr_0 = 6.8687e-04
Loss = 1.4296e-01, PNorm = 64.7801, GNorm = 0.7702, lr_0 = 6.8640e-04
Loss = 1.4524e-01, PNorm = 64.8009, GNorm = 0.8513, lr_0 = 6.8593e-04
Loss = 1.5317e-01, PNorm = 64.8161, GNorm = 0.6205, lr_0 = 6.8546e-04
Loss = 1.5454e-01, PNorm = 64.8392, GNorm = 1.4497, lr_0 = 6.8499e-04
Loss = 1.7532e-01, PNorm = 64.8594, GNorm = 0.5532, lr_0 = 6.8452e-04
Loss = 1.7954e-01, PNorm = 64.8925, GNorm = 1.1353, lr_0 = 6.8405e-04
Loss = 1.6637e-01, PNorm = 64.9233, GNorm = 0.7941, lr_0 = 6.8358e-04
Loss = 1.6732e-01, PNorm = 64.9460, GNorm = 0.7808, lr_0 = 6.8312e-04
Loss = 1.4194e-01, PNorm = 64.9628, GNorm = 0.5757, lr_0 = 6.8265e-04
Loss = 1.6570e-01, PNorm = 64.9822, GNorm = 0.6333, lr_0 = 6.8218e-04
Loss = 1.8854e-01, PNorm = 65.0003, GNorm = 1.3009, lr_0 = 6.8171e-04
Loss = 1.7780e-01, PNorm = 65.0281, GNorm = 1.2338, lr_0 = 6.8125e-04
Loss = 1.6085e-01, PNorm = 65.0530, GNorm = 0.8577, lr_0 = 6.8078e-04
Loss = 1.6425e-01, PNorm = 65.0727, GNorm = 0.6393, lr_0 = 6.8031e-04
Loss = 1.3798e-01, PNorm = 65.0923, GNorm = 1.1390, lr_0 = 6.7985e-04
Loss = 1.7333e-01, PNorm = 65.1164, GNorm = 1.2223, lr_0 = 6.7938e-04
Loss = 1.6549e-01, PNorm = 65.1408, GNorm = 1.6656, lr_0 = 6.7892e-04
Loss = 2.0697e-01, PNorm = 65.1661, GNorm = 1.7415, lr_0 = 6.7845e-04
Loss = 1.9013e-01, PNorm = 65.2011, GNorm = 0.7503, lr_0 = 6.7799e-04
Loss = 1.5253e-01, PNorm = 65.2288, GNorm = 0.8917, lr_0 = 6.7752e-04
Loss = 1.8516e-01, PNorm = 65.2470, GNorm = 1.1717, lr_0 = 6.7706e-04
Loss = 1.5498e-01, PNorm = 65.2760, GNorm = 0.9671, lr_0 = 6.7659e-04
Loss = 1.5441e-01, PNorm = 65.2967, GNorm = 0.8320, lr_0 = 6.7613e-04
Loss = 1.5696e-01, PNorm = 65.3204, GNorm = 0.9018, lr_0 = 6.7567e-04
Loss = 1.7321e-01, PNorm = 65.3451, GNorm = 0.8253, lr_0 = 6.7520e-04
Loss = 1.6442e-01, PNorm = 65.3623, GNorm = 1.0743, lr_0 = 6.7474e-04
Loss = 1.7668e-01, PNorm = 65.3846, GNorm = 1.1615, lr_0 = 6.7428e-04
Loss = 1.8929e-01, PNorm = 65.4047, GNorm = 0.7790, lr_0 = 6.7382e-04
Loss = 1.9101e-01, PNorm = 65.4328, GNorm = 0.5030, lr_0 = 6.7335e-04
Loss = 1.5109e-01, PNorm = 65.4563, GNorm = 0.7938, lr_0 = 6.7289e-04
Loss = 1.6465e-01, PNorm = 65.4762, GNorm = 1.2968, lr_0 = 6.7243e-04
Loss = 1.8514e-01, PNorm = 65.4978, GNorm = 1.5510, lr_0 = 6.7197e-04
Loss = 1.8911e-01, PNorm = 65.5230, GNorm = 0.7919, lr_0 = 6.7151e-04
Loss = 1.7010e-01, PNorm = 65.5462, GNorm = 0.5670, lr_0 = 6.7105e-04
Loss = 1.6878e-01, PNorm = 65.5678, GNorm = 1.7215, lr_0 = 6.7059e-04
Loss = 1.5318e-01, PNorm = 65.5837, GNorm = 0.7864, lr_0 = 6.7013e-04
Loss = 1.7828e-01, PNorm = 65.5998, GNorm = 1.1686, lr_0 = 6.6967e-04
Loss = 1.6384e-01, PNorm = 65.6186, GNorm = 0.9785, lr_0 = 6.6921e-04
Loss = 1.6543e-01, PNorm = 65.6386, GNorm = 0.9119, lr_0 = 6.6876e-04
Loss = 1.6779e-01, PNorm = 65.6532, GNorm = 1.1751, lr_0 = 6.6830e-04
Loss = 1.5409e-01, PNorm = 65.6731, GNorm = 0.8018, lr_0 = 6.6784e-04
Loss = 1.6791e-01, PNorm = 65.6978, GNorm = 0.4978, lr_0 = 6.6738e-04
Loss = 1.6646e-01, PNorm = 65.7162, GNorm = 0.8619, lr_0 = 6.6693e-04
Loss = 1.7924e-01, PNorm = 65.7354, GNorm = 2.3780, lr_0 = 6.6647e-04
Loss = 1.9343e-01, PNorm = 65.7582, GNorm = 1.3033, lr_0 = 6.6601e-04
Loss = 1.8209e-01, PNorm = 65.7891, GNorm = 1.1800, lr_0 = 6.6556e-04
Loss = 1.5954e-01, PNorm = 65.8118, GNorm = 0.6839, lr_0 = 6.6510e-04
Loss = 1.8712e-01, PNorm = 65.8338, GNorm = 0.8103, lr_0 = 6.6464e-04
Loss = 1.6236e-01, PNorm = 65.8514, GNorm = 0.8096, lr_0 = 6.6419e-04
Loss = 1.5193e-01, PNorm = 65.8732, GNorm = 0.8165, lr_0 = 6.6373e-04
Loss = 1.4649e-01, PNorm = 65.8929, GNorm = 1.3573, lr_0 = 6.6328e-04
Loss = 1.7680e-01, PNorm = 65.9102, GNorm = 1.1941, lr_0 = 6.6282e-04
Validation mae = 0.252486
Epoch 7
Loss = 1.6768e-01, PNorm = 65.9315, GNorm = 1.2260, lr_0 = 6.6237e-04
Loss = 1.5703e-01, PNorm = 65.9484, GNorm = 0.7796, lr_0 = 6.6192e-04
Loss = 1.4298e-01, PNorm = 65.9637, GNorm = 0.8811, lr_0 = 6.6146e-04
Loss = 1.6057e-01, PNorm = 65.9717, GNorm = 0.7015, lr_0 = 6.6101e-04
Loss = 1.4408e-01, PNorm = 65.9926, GNorm = 0.4970, lr_0 = 6.6056e-04
Loss = 1.6833e-01, PNorm = 66.0057, GNorm = 0.8763, lr_0 = 6.6011e-04
Loss = 1.7289e-01, PNorm = 66.0263, GNorm = 0.8592, lr_0 = 6.5965e-04
Loss = 1.6788e-01, PNorm = 66.0447, GNorm = 1.4555, lr_0 = 6.5920e-04
Loss = 1.6566e-01, PNorm = 66.0690, GNorm = 0.5925, lr_0 = 6.5875e-04
Loss = 1.5769e-01, PNorm = 66.0907, GNorm = 0.7380, lr_0 = 6.5830e-04
Loss = 1.6606e-01, PNorm = 66.1197, GNorm = 0.8393, lr_0 = 6.5785e-04
Loss = 1.7030e-01, PNorm = 66.1473, GNorm = 0.6490, lr_0 = 6.5740e-04
Loss = 1.5899e-01, PNorm = 66.1742, GNorm = 1.1888, lr_0 = 6.5695e-04
Loss = 1.7463e-01, PNorm = 66.1959, GNorm = 1.9214, lr_0 = 6.5650e-04
Loss = 1.6481e-01, PNorm = 66.2175, GNorm = 0.6871, lr_0 = 6.5605e-04
Loss = 1.4930e-01, PNorm = 66.2455, GNorm = 0.7224, lr_0 = 6.5560e-04
Loss = 1.2598e-01, PNorm = 66.2645, GNorm = 0.4948, lr_0 = 6.5515e-04
Loss = 1.4885e-01, PNorm = 66.2815, GNorm = 0.7431, lr_0 = 6.5470e-04
Loss = 1.4805e-01, PNorm = 66.3015, GNorm = 0.7579, lr_0 = 6.5425e-04
Loss = 1.7489e-01, PNorm = 66.3296, GNorm = 1.0035, lr_0 = 6.5380e-04
Loss = 1.3798e-01, PNorm = 66.3565, GNorm = 0.5784, lr_0 = 6.5335e-04
Loss = 1.5557e-01, PNorm = 66.3719, GNorm = 0.7034, lr_0 = 6.5291e-04
Loss = 1.5949e-01, PNorm = 66.3956, GNorm = 0.9603, lr_0 = 6.5246e-04
Loss = 1.4507e-01, PNorm = 66.4135, GNorm = 0.5659, lr_0 = 6.5201e-04
Loss = 1.3865e-01, PNorm = 66.4365, GNorm = 0.5668, lr_0 = 6.5157e-04
Loss = 1.5461e-01, PNorm = 66.4561, GNorm = 0.8288, lr_0 = 6.5112e-04
Loss = 1.4816e-01, PNorm = 66.4724, GNorm = 0.5633, lr_0 = 6.5067e-04
Loss = 1.5889e-01, PNorm = 66.4870, GNorm = 0.4768, lr_0 = 6.5023e-04
Loss = 1.5703e-01, PNorm = 66.5055, GNorm = 0.9982, lr_0 = 6.4978e-04
Loss = 1.6505e-01, PNorm = 66.5255, GNorm = 0.6513, lr_0 = 6.4934e-04
Loss = 1.4294e-01, PNorm = 66.5497, GNorm = 1.1845, lr_0 = 6.4889e-04
Loss = 1.6236e-01, PNorm = 66.5663, GNorm = 0.7413, lr_0 = 6.4845e-04
Loss = 1.4915e-01, PNorm = 66.5882, GNorm = 0.7701, lr_0 = 6.4800e-04
Loss = 1.6690e-01, PNorm = 66.6112, GNorm = 1.0019, lr_0 = 6.4756e-04
Loss = 1.5960e-01, PNorm = 66.6388, GNorm = 1.8286, lr_0 = 6.4712e-04
Loss = 1.4560e-01, PNorm = 66.6577, GNorm = 0.6630, lr_0 = 6.4667e-04
Loss = 1.5127e-01, PNorm = 66.6834, GNorm = 1.3304, lr_0 = 6.4623e-04
Loss = 1.4071e-01, PNorm = 66.7091, GNorm = 0.5755, lr_0 = 6.4579e-04
Loss = 1.5503e-01, PNorm = 66.7299, GNorm = 0.9353, lr_0 = 6.4534e-04
Loss = 1.4877e-01, PNorm = 66.7519, GNorm = 1.0411, lr_0 = 6.4490e-04
Loss = 1.4647e-01, PNorm = 66.7716, GNorm = 1.3776, lr_0 = 6.4446e-04
Loss = 1.5427e-01, PNorm = 66.7921, GNorm = 0.7983, lr_0 = 6.4402e-04
Loss = 1.5994e-01, PNorm = 66.8101, GNorm = 0.5614, lr_0 = 6.4358e-04
Loss = 1.7477e-01, PNorm = 66.8363, GNorm = 0.9622, lr_0 = 6.4314e-04
Loss = 1.6007e-01, PNorm = 66.8518, GNorm = 0.9794, lr_0 = 6.4270e-04
Loss = 1.5344e-01, PNorm = 66.8796, GNorm = 0.9024, lr_0 = 6.4226e-04
Loss = 1.5856e-01, PNorm = 66.9057, GNorm = 1.1595, lr_0 = 6.4182e-04
Loss = 1.7548e-01, PNorm = 66.9260, GNorm = 0.6156, lr_0 = 6.4138e-04
Loss = 1.6799e-01, PNorm = 66.9418, GNorm = 0.9796, lr_0 = 6.4094e-04
Loss = 1.5577e-01, PNorm = 66.9617, GNorm = 0.7945, lr_0 = 6.4050e-04
Loss = 1.6739e-01, PNorm = 66.9864, GNorm = 0.8339, lr_0 = 6.4006e-04
Loss = 1.5848e-01, PNorm = 67.0177, GNorm = 1.6042, lr_0 = 6.3962e-04
Loss = 1.4406e-01, PNorm = 67.0404, GNorm = 0.8967, lr_0 = 6.3918e-04
Loss = 1.6884e-01, PNorm = 67.0666, GNorm = 0.7429, lr_0 = 6.3874e-04
Loss = 1.4984e-01, PNorm = 67.0881, GNorm = 0.9039, lr_0 = 6.3831e-04
Loss = 1.4844e-01, PNorm = 67.1044, GNorm = 0.5820, lr_0 = 6.3787e-04
Loss = 1.3700e-01, PNorm = 67.1284, GNorm = 0.8037, lr_0 = 6.3743e-04
Loss = 1.4508e-01, PNorm = 67.1495, GNorm = 0.7447, lr_0 = 6.3700e-04
Loss = 1.3675e-01, PNorm = 67.1692, GNorm = 1.2816, lr_0 = 6.3656e-04
Loss = 1.6209e-01, PNorm = 67.1814, GNorm = 1.5176, lr_0 = 6.3612e-04
Loss = 1.5589e-01, PNorm = 67.2043, GNorm = 0.9829, lr_0 = 6.3569e-04
Loss = 1.4950e-01, PNorm = 67.2257, GNorm = 0.8840, lr_0 = 6.3525e-04
Loss = 1.5476e-01, PNorm = 67.2508, GNorm = 1.7240, lr_0 = 6.3482e-04
Loss = 1.4452e-01, PNorm = 67.2702, GNorm = 0.5877, lr_0 = 6.3438e-04
Loss = 1.5776e-01, PNorm = 67.2900, GNorm = 0.7239, lr_0 = 6.3395e-04
Loss = 1.4733e-01, PNorm = 67.3161, GNorm = 0.6931, lr_0 = 6.3351e-04
Loss = 1.6127e-01, PNorm = 67.3288, GNorm = 0.6529, lr_0 = 6.3308e-04
Loss = 1.5263e-01, PNorm = 67.3402, GNorm = 0.6930, lr_0 = 6.3265e-04
Loss = 1.5011e-01, PNorm = 67.3593, GNorm = 1.8532, lr_0 = 6.3221e-04
Loss = 1.5501e-01, PNorm = 67.3816, GNorm = 1.1978, lr_0 = 6.3178e-04
Loss = 1.5278e-01, PNorm = 67.4010, GNorm = 0.5290, lr_0 = 6.3135e-04
Loss = 1.6010e-01, PNorm = 67.4274, GNorm = 0.6660, lr_0 = 6.3091e-04
Loss = 1.4772e-01, PNorm = 67.4509, GNorm = 1.3371, lr_0 = 6.3048e-04
Loss = 1.4837e-01, PNorm = 67.4674, GNorm = 0.8349, lr_0 = 6.3005e-04
Loss = 1.5053e-01, PNorm = 67.4891, GNorm = 0.8390, lr_0 = 6.2962e-04
Loss = 1.3878e-01, PNorm = 67.5071, GNorm = 1.1792, lr_0 = 6.2919e-04
Loss = 1.6076e-01, PNorm = 67.5292, GNorm = 0.8282, lr_0 = 6.2876e-04
Loss = 1.5678e-01, PNorm = 67.5485, GNorm = 0.7015, lr_0 = 6.2833e-04
Loss = 1.6571e-01, PNorm = 67.5702, GNorm = 1.0346, lr_0 = 6.2789e-04
Loss = 1.4681e-01, PNorm = 67.5943, GNorm = 0.7776, lr_0 = 6.2746e-04
Loss = 1.4463e-01, PNorm = 67.6118, GNorm = 0.7749, lr_0 = 6.2703e-04
Loss = 1.6206e-01, PNorm = 67.6247, GNorm = 1.1544, lr_0 = 6.2661e-04
Loss = 1.8081e-01, PNorm = 67.6484, GNorm = 0.9916, lr_0 = 6.2618e-04
Loss = 1.6961e-01, PNorm = 67.6732, GNorm = 0.8485, lr_0 = 6.2575e-04
Loss = 1.6134e-01, PNorm = 67.6969, GNorm = 0.8008, lr_0 = 6.2532e-04
Loss = 1.6159e-01, PNorm = 67.7194, GNorm = 1.0670, lr_0 = 6.2489e-04
Loss = 1.6318e-01, PNorm = 67.7444, GNorm = 0.7840, lr_0 = 6.2446e-04
Loss = 1.3562e-01, PNorm = 67.7693, GNorm = 0.6457, lr_0 = 6.2403e-04
Loss = 1.6562e-01, PNorm = 67.7837, GNorm = 0.6347, lr_0 = 6.2361e-04
Loss = 1.6178e-01, PNorm = 67.7974, GNorm = 0.9619, lr_0 = 6.2318e-04
Loss = 1.4802e-01, PNorm = 67.8191, GNorm = 0.9668, lr_0 = 6.2275e-04
Loss = 1.5814e-01, PNorm = 67.8353, GNorm = 0.6246, lr_0 = 6.2233e-04
Loss = 1.7021e-01, PNorm = 67.8552, GNorm = 0.9847, lr_0 = 6.2190e-04
Loss = 1.5995e-01, PNorm = 67.8729, GNorm = 1.3848, lr_0 = 6.2147e-04
Loss = 1.4429e-01, PNorm = 67.8899, GNorm = 0.6561, lr_0 = 6.2105e-04
Loss = 1.6291e-01, PNorm = 67.9125, GNorm = 0.7232, lr_0 = 6.2062e-04
Loss = 1.5192e-01, PNorm = 67.9363, GNorm = 1.0838, lr_0 = 6.2020e-04
Loss = 1.6638e-01, PNorm = 67.9536, GNorm = 0.8212, lr_0 = 6.1977e-04
Loss = 1.6643e-01, PNorm = 67.9795, GNorm = 0.7032, lr_0 = 6.1935e-04
Loss = 1.5955e-01, PNorm = 68.0052, GNorm = 1.0077, lr_0 = 6.1892e-04
Loss = 1.6698e-01, PNorm = 68.0349, GNorm = 0.7380, lr_0 = 6.1850e-04
Loss = 1.6523e-01, PNorm = 68.0546, GNorm = 0.6033, lr_0 = 6.1808e-04
Loss = 1.3978e-01, PNorm = 68.0725, GNorm = 1.0154, lr_0 = 6.1765e-04
Loss = 1.7554e-01, PNorm = 68.0898, GNorm = 0.7133, lr_0 = 6.1723e-04
Loss = 1.5143e-01, PNorm = 68.1126, GNorm = 0.6613, lr_0 = 6.1681e-04
Loss = 1.5840e-01, PNorm = 68.1325, GNorm = 0.7952, lr_0 = 6.1638e-04
Loss = 1.6494e-01, PNorm = 68.1550, GNorm = 0.6524, lr_0 = 6.1596e-04
Loss = 1.4584e-01, PNorm = 68.1751, GNorm = 0.9384, lr_0 = 6.1554e-04
Loss = 1.5530e-01, PNorm = 68.1906, GNorm = 1.0147, lr_0 = 6.1512e-04
Loss = 1.6889e-01, PNorm = 68.2025, GNorm = 0.8198, lr_0 = 6.1470e-04
Loss = 1.8649e-01, PNorm = 68.2171, GNorm = 1.2381, lr_0 = 6.1428e-04
Loss = 1.6172e-01, PNorm = 68.2432, GNorm = 1.3476, lr_0 = 6.1385e-04
Loss = 1.5041e-01, PNorm = 68.2632, GNorm = 0.5091, lr_0 = 6.1343e-04
Loss = 1.3745e-01, PNorm = 68.2782, GNorm = 0.9304, lr_0 = 6.1301e-04
Loss = 1.6315e-01, PNorm = 68.2887, GNorm = 0.7269, lr_0 = 6.1259e-04
Loss = 1.3557e-01, PNorm = 68.3059, GNorm = 1.0401, lr_0 = 6.1217e-04
Loss = 1.6617e-01, PNorm = 68.3234, GNorm = 1.0696, lr_0 = 6.1175e-04
Loss = 1.5424e-01, PNorm = 68.3473, GNorm = 1.2218, lr_0 = 6.1134e-04
Loss = 1.3667e-01, PNorm = 68.3696, GNorm = 0.7654, lr_0 = 6.1092e-04
Loss = 1.5840e-01, PNorm = 68.3839, GNorm = 0.5337, lr_0 = 6.1050e-04
Validation mae = 0.249656
Epoch 8
Loss = 1.4073e-01, PNorm = 68.4008, GNorm = 0.4994, lr_0 = 6.1008e-04
Loss = 1.6401e-01, PNorm = 68.4231, GNorm = 0.9976, lr_0 = 6.0966e-04
Loss = 1.4366e-01, PNorm = 68.4421, GNorm = 1.0766, lr_0 = 6.0924e-04
Loss = 1.5337e-01, PNorm = 68.4683, GNorm = 0.5786, lr_0 = 6.0883e-04
Loss = 1.5390e-01, PNorm = 68.4870, GNorm = 0.6612, lr_0 = 6.0841e-04
Loss = 1.5488e-01, PNorm = 68.5087, GNorm = 0.7381, lr_0 = 6.0799e-04
Loss = 1.3425e-01, PNorm = 68.5303, GNorm = 1.0359, lr_0 = 6.0758e-04
Loss = 1.5666e-01, PNorm = 68.5509, GNorm = 1.9449, lr_0 = 6.0716e-04
Loss = 1.5129e-01, PNorm = 68.5696, GNorm = 1.2719, lr_0 = 6.0674e-04
Loss = 1.5150e-01, PNorm = 68.5919, GNorm = 0.6964, lr_0 = 6.0633e-04
Loss = 1.3734e-01, PNorm = 68.6068, GNorm = 0.8922, lr_0 = 6.0591e-04
Loss = 1.3936e-01, PNorm = 68.6310, GNorm = 0.6012, lr_0 = 6.0550e-04
Loss = 1.5906e-01, PNorm = 68.6510, GNorm = 0.9919, lr_0 = 6.0508e-04
Loss = 1.2866e-01, PNorm = 68.6720, GNorm = 0.8170, lr_0 = 6.0467e-04
Loss = 1.5506e-01, PNorm = 68.6916, GNorm = 0.9159, lr_0 = 6.0425e-04
Loss = 1.6097e-01, PNorm = 68.7118, GNorm = 0.6622, lr_0 = 6.0384e-04
Loss = 1.5384e-01, PNorm = 68.7336, GNorm = 0.7077, lr_0 = 6.0343e-04
Loss = 1.4434e-01, PNorm = 68.7530, GNorm = 0.5307, lr_0 = 6.0301e-04
Loss = 1.6701e-01, PNorm = 68.7738, GNorm = 0.5860, lr_0 = 6.0260e-04
Loss = 1.6770e-01, PNorm = 68.8041, GNorm = 0.8361, lr_0 = 6.0219e-04
Loss = 1.3920e-01, PNorm = 68.8245, GNorm = 0.7983, lr_0 = 6.0178e-04
Loss = 1.7180e-01, PNorm = 68.8402, GNorm = 0.8006, lr_0 = 6.0136e-04
Loss = 1.4158e-01, PNorm = 68.8563, GNorm = 0.6151, lr_0 = 6.0095e-04
Loss = 1.3717e-01, PNorm = 68.8670, GNorm = 0.6334, lr_0 = 6.0054e-04
Loss = 1.6010e-01, PNorm = 68.8833, GNorm = 1.1669, lr_0 = 6.0013e-04
Loss = 1.4534e-01, PNorm = 68.9029, GNorm = 0.7138, lr_0 = 5.9972e-04
Loss = 1.3831e-01, PNorm = 68.9202, GNorm = 0.8912, lr_0 = 5.9931e-04
Loss = 1.3431e-01, PNorm = 68.9421, GNorm = 0.5967, lr_0 = 5.9890e-04
Loss = 1.5114e-01, PNorm = 68.9592, GNorm = 1.0367, lr_0 = 5.9849e-04
Loss = 1.2896e-01, PNorm = 68.9742, GNorm = 0.6090, lr_0 = 5.9808e-04
Loss = 1.3962e-01, PNorm = 68.9890, GNorm = 0.8718, lr_0 = 5.9767e-04
Loss = 1.5059e-01, PNorm = 69.0036, GNorm = 0.7232, lr_0 = 5.9726e-04
Loss = 1.4627e-01, PNorm = 69.0188, GNorm = 0.6360, lr_0 = 5.9685e-04
Loss = 1.3912e-01, PNorm = 69.0399, GNorm = 0.7688, lr_0 = 5.9644e-04
Loss = 1.4085e-01, PNorm = 69.0559, GNorm = 0.8385, lr_0 = 5.9603e-04
Loss = 1.4376e-01, PNorm = 69.0759, GNorm = 0.5668, lr_0 = 5.9562e-04
Loss = 1.4802e-01, PNorm = 69.0943, GNorm = 1.2017, lr_0 = 5.9521e-04
Loss = 1.4780e-01, PNorm = 69.1127, GNorm = 0.8160, lr_0 = 5.9481e-04
Loss = 1.7171e-01, PNorm = 69.1326, GNorm = 1.0015, lr_0 = 5.9440e-04
Loss = 1.5607e-01, PNorm = 69.1525, GNorm = 0.7301, lr_0 = 5.9399e-04
Loss = 1.6060e-01, PNorm = 69.1771, GNorm = 0.8732, lr_0 = 5.9358e-04
Loss = 1.3607e-01, PNorm = 69.2005, GNorm = 0.5650, lr_0 = 5.9318e-04
Loss = 1.4745e-01, PNorm = 69.2226, GNorm = 2.1665, lr_0 = 5.9277e-04
Loss = 1.5513e-01, PNorm = 69.2363, GNorm = 0.9470, lr_0 = 5.9236e-04
Loss = 1.3870e-01, PNorm = 69.2611, GNorm = 0.7326, lr_0 = 5.9196e-04
Loss = 1.5886e-01, PNorm = 69.2793, GNorm = 0.8716, lr_0 = 5.9155e-04
Loss = 1.5343e-01, PNorm = 69.2976, GNorm = 1.1421, lr_0 = 5.9115e-04
Loss = 1.3587e-01, PNorm = 69.3161, GNorm = 0.8563, lr_0 = 5.9074e-04
Loss = 1.4863e-01, PNorm = 69.3338, GNorm = 0.6684, lr_0 = 5.9034e-04
Loss = 1.3671e-01, PNorm = 69.3494, GNorm = 0.8179, lr_0 = 5.8993e-04
Loss = 1.4867e-01, PNorm = 69.3676, GNorm = 0.5870, lr_0 = 5.8953e-04
Loss = 1.4781e-01, PNorm = 69.3929, GNorm = 0.6790, lr_0 = 5.8913e-04
Loss = 1.3599e-01, PNorm = 69.4119, GNorm = 0.8500, lr_0 = 5.8872e-04
Loss = 1.3373e-01, PNorm = 69.4309, GNorm = 0.5598, lr_0 = 5.8832e-04
Loss = 1.2857e-01, PNorm = 69.4496, GNorm = 0.7249, lr_0 = 5.8792e-04
Loss = 1.3546e-01, PNorm = 69.4656, GNorm = 1.1551, lr_0 = 5.8751e-04
Loss = 1.5609e-01, PNorm = 69.4905, GNorm = 0.4868, lr_0 = 5.8711e-04
Loss = 1.4219e-01, PNorm = 69.5157, GNorm = 0.7210, lr_0 = 5.8671e-04
Loss = 1.4169e-01, PNorm = 69.5338, GNorm = 0.9381, lr_0 = 5.8631e-04
Loss = 1.4490e-01, PNorm = 69.5529, GNorm = 0.5616, lr_0 = 5.8591e-04
Loss = 1.3395e-01, PNorm = 69.5678, GNorm = 0.9039, lr_0 = 5.8550e-04
Loss = 1.3258e-01, PNorm = 69.5780, GNorm = 0.5304, lr_0 = 5.8510e-04
Loss = 1.2825e-01, PNorm = 69.5935, GNorm = 0.5357, lr_0 = 5.8470e-04
Loss = 1.4401e-01, PNorm = 69.6106, GNorm = 0.7010, lr_0 = 5.8430e-04
Loss = 1.4611e-01, PNorm = 69.6378, GNorm = 0.8690, lr_0 = 5.8390e-04
Loss = 1.4725e-01, PNorm = 69.6594, GNorm = 0.8573, lr_0 = 5.8350e-04
Loss = 1.6474e-01, PNorm = 69.6810, GNorm = 0.6644, lr_0 = 5.8310e-04
Loss = 1.6030e-01, PNorm = 69.7004, GNorm = 0.7395, lr_0 = 5.8270e-04
Loss = 1.4361e-01, PNorm = 69.7187, GNorm = 0.7306, lr_0 = 5.8230e-04
Loss = 1.3884e-01, PNorm = 69.7381, GNorm = 0.6732, lr_0 = 5.8190e-04
Loss = 1.2598e-01, PNorm = 69.7556, GNorm = 0.5954, lr_0 = 5.8151e-04
Loss = 1.4196e-01, PNorm = 69.7747, GNorm = 0.5780, lr_0 = 5.8111e-04
Loss = 1.3887e-01, PNorm = 69.7878, GNorm = 0.7186, lr_0 = 5.8071e-04
Loss = 1.5345e-01, PNorm = 69.8021, GNorm = 0.5810, lr_0 = 5.8031e-04
Loss = 1.5788e-01, PNorm = 69.8254, GNorm = 1.0295, lr_0 = 5.7991e-04
Loss = 1.5375e-01, PNorm = 69.8451, GNorm = 1.1859, lr_0 = 5.7952e-04
Loss = 1.3890e-01, PNorm = 69.8638, GNorm = 1.3226, lr_0 = 5.7912e-04
Loss = 1.4300e-01, PNorm = 69.8821, GNorm = 1.2301, lr_0 = 5.7872e-04
Loss = 1.5263e-01, PNorm = 69.9013, GNorm = 0.7737, lr_0 = 5.7833e-04
Loss = 1.2640e-01, PNorm = 69.9177, GNorm = 0.5465, lr_0 = 5.7793e-04
Loss = 1.4311e-01, PNorm = 69.9338, GNorm = 1.0875, lr_0 = 5.7753e-04
Loss = 1.3937e-01, PNorm = 69.9537, GNorm = 0.6323, lr_0 = 5.7714e-04
Loss = 1.3438e-01, PNorm = 69.9784, GNorm = 0.6379, lr_0 = 5.7674e-04
Loss = 1.5191e-01, PNorm = 69.9999, GNorm = 0.6478, lr_0 = 5.7635e-04
Loss = 1.3496e-01, PNorm = 70.0153, GNorm = 0.8750, lr_0 = 5.7595e-04
Loss = 1.5488e-01, PNorm = 70.0306, GNorm = 0.6379, lr_0 = 5.7556e-04
Loss = 1.3719e-01, PNorm = 70.0506, GNorm = 0.6196, lr_0 = 5.7516e-04
Loss = 1.4832e-01, PNorm = 70.0745, GNorm = 0.5955, lr_0 = 5.7477e-04
Loss = 1.4340e-01, PNorm = 70.0977, GNorm = 1.1798, lr_0 = 5.7438e-04
Loss = 1.5336e-01, PNorm = 70.1193, GNorm = 0.5866, lr_0 = 5.7398e-04
Loss = 1.8984e-01, PNorm = 70.1347, GNorm = 0.9326, lr_0 = 5.7359e-04
Loss = 1.6060e-01, PNorm = 70.1563, GNorm = 0.9094, lr_0 = 5.7320e-04
Loss = 1.4957e-01, PNorm = 70.1699, GNorm = 0.5226, lr_0 = 5.7280e-04
Loss = 1.5977e-01, PNorm = 70.1851, GNorm = 0.8167, lr_0 = 5.7241e-04
Loss = 1.4061e-01, PNorm = 70.1955, GNorm = 0.6237, lr_0 = 5.7202e-04
Loss = 1.6301e-01, PNorm = 70.2076, GNorm = 0.9900, lr_0 = 5.7163e-04
Loss = 1.3987e-01, PNorm = 70.2264, GNorm = 1.1126, lr_0 = 5.7124e-04
Loss = 1.5312e-01, PNorm = 70.2504, GNorm = 0.7633, lr_0 = 5.7084e-04
Loss = 1.4635e-01, PNorm = 70.2762, GNorm = 0.6641, lr_0 = 5.7045e-04
Loss = 1.3683e-01, PNorm = 70.2942, GNorm = 1.2819, lr_0 = 5.7006e-04
Loss = 1.4045e-01, PNorm = 70.3062, GNorm = 0.7533, lr_0 = 5.6967e-04
Loss = 1.5345e-01, PNorm = 70.3252, GNorm = 0.8194, lr_0 = 5.6928e-04
Loss = 1.7498e-01, PNorm = 70.3433, GNorm = 0.5765, lr_0 = 5.6889e-04
Loss = 1.7249e-01, PNorm = 70.3656, GNorm = 2.1250, lr_0 = 5.6850e-04
Loss = 1.6352e-01, PNorm = 70.3915, GNorm = 0.9094, lr_0 = 5.6811e-04
Loss = 1.6768e-01, PNorm = 70.4204, GNorm = 0.8510, lr_0 = 5.6772e-04
Loss = 1.5267e-01, PNorm = 70.4394, GNorm = 0.5518, lr_0 = 5.6733e-04
Loss = 1.4726e-01, PNorm = 70.4556, GNorm = 1.0357, lr_0 = 5.6695e-04
Loss = 1.5306e-01, PNorm = 70.4696, GNorm = 0.6623, lr_0 = 5.6656e-04
Loss = 1.4175e-01, PNorm = 70.4888, GNorm = 0.8673, lr_0 = 5.6617e-04
Loss = 1.7684e-01, PNorm = 70.5070, GNorm = 0.7853, lr_0 = 5.6578e-04
Loss = 1.4717e-01, PNorm = 70.5247, GNorm = 0.8673, lr_0 = 5.6539e-04
Loss = 1.6948e-01, PNorm = 70.5434, GNorm = 1.7133, lr_0 = 5.6501e-04
Loss = 1.4198e-01, PNorm = 70.5612, GNorm = 0.6722, lr_0 = 5.6462e-04
Loss = 1.4747e-01, PNorm = 70.5839, GNorm = 1.1213, lr_0 = 5.6423e-04
Loss = 1.5003e-01, PNorm = 70.6041, GNorm = 0.5586, lr_0 = 5.6385e-04
Loss = 1.6007e-01, PNorm = 70.6216, GNorm = 1.1636, lr_0 = 5.6346e-04
Loss = 1.5471e-01, PNorm = 70.6419, GNorm = 0.7226, lr_0 = 5.6307e-04
Loss = 1.6398e-01, PNorm = 70.6653, GNorm = 1.6849, lr_0 = 5.6269e-04
Loss = 1.3196e-01, PNorm = 70.6818, GNorm = 1.4601, lr_0 = 5.6230e-04
Validation mae = 0.250962
Epoch 9
Loss = 1.4187e-01, PNorm = 70.6931, GNorm = 0.6094, lr_0 = 5.6192e-04
Loss = 1.4151e-01, PNorm = 70.7117, GNorm = 0.7844, lr_0 = 5.6153e-04
Loss = 1.2785e-01, PNorm = 70.7284, GNorm = 0.7289, lr_0 = 5.6115e-04
Loss = 1.2868e-01, PNorm = 70.7414, GNorm = 0.8513, lr_0 = 5.6076e-04
Loss = 1.3220e-01, PNorm = 70.7501, GNorm = 0.5063, lr_0 = 5.6038e-04
Loss = 1.4996e-01, PNorm = 70.7709, GNorm = 0.8076, lr_0 = 5.6000e-04
Loss = 1.2969e-01, PNorm = 70.7944, GNorm = 1.2596, lr_0 = 5.5961e-04
Loss = 1.4978e-01, PNorm = 70.8111, GNorm = 0.9733, lr_0 = 5.5923e-04
Loss = 1.4829e-01, PNorm = 70.8285, GNorm = 0.8942, lr_0 = 5.5885e-04
Loss = 1.2399e-01, PNorm = 70.8429, GNorm = 0.5695, lr_0 = 5.5846e-04
Loss = 1.4085e-01, PNorm = 70.8625, GNorm = 0.8308, lr_0 = 5.5808e-04
Loss = 1.1545e-01, PNorm = 70.8772, GNorm = 0.8835, lr_0 = 5.5770e-04
Loss = 1.4651e-01, PNorm = 70.8977, GNorm = 1.2139, lr_0 = 5.5732e-04
Loss = 1.4801e-01, PNorm = 70.9136, GNorm = 0.5625, lr_0 = 5.5693e-04
Loss = 1.1914e-01, PNorm = 70.9360, GNorm = 0.6461, lr_0 = 5.5655e-04
Loss = 1.4294e-01, PNorm = 70.9591, GNorm = 0.9171, lr_0 = 5.5617e-04
Loss = 1.3065e-01, PNorm = 70.9788, GNorm = 0.4613, lr_0 = 5.5579e-04
Loss = 1.2827e-01, PNorm = 70.9999, GNorm = 1.1099, lr_0 = 5.5541e-04
Loss = 1.2580e-01, PNorm = 71.0161, GNorm = 0.5104, lr_0 = 5.5503e-04
Loss = 1.2846e-01, PNorm = 71.0299, GNorm = 0.5544, lr_0 = 5.5465e-04
Loss = 1.3848e-01, PNorm = 71.0393, GNorm = 0.7234, lr_0 = 5.5427e-04
Loss = 1.5925e-01, PNorm = 71.0581, GNorm = 0.9317, lr_0 = 5.5389e-04
Loss = 1.2645e-01, PNorm = 71.0765, GNorm = 0.6834, lr_0 = 5.5351e-04
Loss = 1.5019e-01, PNorm = 71.0986, GNorm = 1.2827, lr_0 = 5.5313e-04
Loss = 1.4808e-01, PNorm = 71.1215, GNorm = 0.6535, lr_0 = 5.5275e-04
Loss = 1.4759e-01, PNorm = 71.1368, GNorm = 0.5794, lr_0 = 5.5237e-04
Loss = 1.5326e-01, PNorm = 71.1506, GNorm = 0.6593, lr_0 = 5.5199e-04
Loss = 1.4200e-01, PNorm = 71.1669, GNorm = 1.5705, lr_0 = 5.5162e-04
Loss = 1.4895e-01, PNorm = 71.1830, GNorm = 1.1096, lr_0 = 5.5124e-04
Loss = 1.3946e-01, PNorm = 71.2016, GNorm = 0.5795, lr_0 = 5.5086e-04
Loss = 1.3915e-01, PNorm = 71.2210, GNorm = 0.9374, lr_0 = 5.5048e-04
Loss = 1.3573e-01, PNorm = 71.2387, GNorm = 0.7669, lr_0 = 5.5011e-04
Loss = 1.2463e-01, PNorm = 71.2570, GNorm = 1.2662, lr_0 = 5.4973e-04
Loss = 1.3778e-01, PNorm = 71.2746, GNorm = 0.9125, lr_0 = 5.4935e-04
Loss = 1.4800e-01, PNorm = 71.2966, GNorm = 1.2140, lr_0 = 5.4898e-04
Loss = 1.2489e-01, PNorm = 71.3157, GNorm = 0.6017, lr_0 = 5.4860e-04
Loss = 1.2796e-01, PNorm = 71.3300, GNorm = 0.5262, lr_0 = 5.4822e-04
Loss = 1.4352e-01, PNorm = 71.3431, GNorm = 0.6560, lr_0 = 5.4785e-04
Loss = 1.4030e-01, PNorm = 71.3568, GNorm = 0.6414, lr_0 = 5.4747e-04
Loss = 1.5225e-01, PNorm = 71.3702, GNorm = 0.7375, lr_0 = 5.4710e-04
Loss = 1.0947e-01, PNorm = 71.3910, GNorm = 0.6290, lr_0 = 5.4672e-04
Loss = 1.4311e-01, PNorm = 71.4046, GNorm = 0.4893, lr_0 = 5.4635e-04
Loss = 1.4151e-01, PNorm = 71.4218, GNorm = 1.0351, lr_0 = 5.4597e-04
Loss = 1.4389e-01, PNorm = 71.4381, GNorm = 0.6944, lr_0 = 5.4560e-04
Loss = 1.4148e-01, PNorm = 71.4543, GNorm = 0.8696, lr_0 = 5.4523e-04
Loss = 1.5426e-01, PNorm = 71.4704, GNorm = 0.8489, lr_0 = 5.4485e-04
Loss = 1.4972e-01, PNorm = 71.4875, GNorm = 0.6652, lr_0 = 5.4448e-04
Loss = 1.3961e-01, PNorm = 71.5074, GNorm = 0.8263, lr_0 = 5.4411e-04
Loss = 1.3010e-01, PNorm = 71.5251, GNorm = 0.9271, lr_0 = 5.4373e-04
Loss = 1.3987e-01, PNorm = 71.5334, GNorm = 0.6126, lr_0 = 5.4336e-04
Loss = 1.6048e-01, PNorm = 71.5451, GNorm = 1.0163, lr_0 = 5.4299e-04
Loss = 1.4512e-01, PNorm = 71.5614, GNorm = 1.0293, lr_0 = 5.4262e-04
Loss = 1.5231e-01, PNorm = 71.5806, GNorm = 1.2419, lr_0 = 5.4225e-04
Loss = 1.4472e-01, PNorm = 71.5919, GNorm = 0.7155, lr_0 = 5.4187e-04
Loss = 1.3929e-01, PNorm = 71.6109, GNorm = 0.6003, lr_0 = 5.4150e-04
Loss = 1.3738e-01, PNorm = 71.6292, GNorm = 0.8556, lr_0 = 5.4113e-04
Loss = 1.4278e-01, PNorm = 71.6454, GNorm = 0.9358, lr_0 = 5.4076e-04
Loss = 1.5465e-01, PNorm = 71.6620, GNorm = 1.1075, lr_0 = 5.4039e-04
Loss = 1.3755e-01, PNorm = 71.6756, GNorm = 0.8985, lr_0 = 5.4002e-04
Loss = 1.5678e-01, PNorm = 71.6912, GNorm = 0.9217, lr_0 = 5.3965e-04
Loss = 1.7338e-01, PNorm = 71.7124, GNorm = 0.8501, lr_0 = 5.3928e-04
Loss = 1.3587e-01, PNorm = 71.7275, GNorm = 0.4652, lr_0 = 5.3891e-04
Loss = 1.5563e-01, PNorm = 71.7485, GNorm = 0.5822, lr_0 = 5.3854e-04
Loss = 1.3767e-01, PNorm = 71.7637, GNorm = 0.5501, lr_0 = 5.3817e-04
Loss = 1.2648e-01, PNorm = 71.7905, GNorm = 0.9382, lr_0 = 5.3781e-04
Loss = 1.5370e-01, PNorm = 71.8095, GNorm = 0.6325, lr_0 = 5.3744e-04
Loss = 1.3196e-01, PNorm = 71.8289, GNorm = 0.9305, lr_0 = 5.3707e-04
Loss = 1.4710e-01, PNorm = 71.8443, GNorm = 0.5929, lr_0 = 5.3670e-04
Loss = 1.4161e-01, PNorm = 71.8684, GNorm = 0.6726, lr_0 = 5.3633e-04
Loss = 1.3068e-01, PNorm = 71.8889, GNorm = 0.8141, lr_0 = 5.3597e-04
Loss = 1.4382e-01, PNorm = 71.9004, GNorm = 0.6370, lr_0 = 5.3560e-04
Loss = 1.5082e-01, PNorm = 71.9116, GNorm = 0.8063, lr_0 = 5.3523e-04
Loss = 1.4090e-01, PNorm = 71.9269, GNorm = 0.4724, lr_0 = 5.3486e-04
Loss = 1.4933e-01, PNorm = 71.9437, GNorm = 1.0780, lr_0 = 5.3450e-04
Loss = 1.3507e-01, PNorm = 71.9522, GNorm = 0.7131, lr_0 = 5.3413e-04
Loss = 1.3438e-01, PNorm = 71.9672, GNorm = 0.6880, lr_0 = 5.3377e-04
Loss = 1.5057e-01, PNorm = 71.9793, GNorm = 0.6795, lr_0 = 5.3340e-04
Loss = 1.5775e-01, PNorm = 72.0008, GNorm = 0.9017, lr_0 = 5.3304e-04
Loss = 1.4448e-01, PNorm = 72.0243, GNorm = 0.9068, lr_0 = 5.3267e-04
Loss = 1.4464e-01, PNorm = 72.0511, GNorm = 0.7649, lr_0 = 5.3231e-04
Loss = 1.3588e-01, PNorm = 72.0630, GNorm = 0.6437, lr_0 = 5.3194e-04
Loss = 1.2218e-01, PNorm = 72.0741, GNorm = 0.7382, lr_0 = 5.3158e-04
Loss = 1.3947e-01, PNorm = 72.0869, GNorm = 0.5265, lr_0 = 5.3121e-04
Loss = 1.3495e-01, PNorm = 72.1081, GNorm = 0.8613, lr_0 = 5.3085e-04
Loss = 1.3891e-01, PNorm = 72.1180, GNorm = 0.8439, lr_0 = 5.3048e-04
Loss = 1.2494e-01, PNorm = 72.1311, GNorm = 0.7461, lr_0 = 5.3012e-04
Loss = 1.5921e-01, PNorm = 72.1438, GNorm = 0.9816, lr_0 = 5.2976e-04
Loss = 1.3889e-01, PNorm = 72.1634, GNorm = 0.6707, lr_0 = 5.2939e-04
Loss = 1.2845e-01, PNorm = 72.1683, GNorm = 0.5285, lr_0 = 5.2903e-04
Loss = 1.3243e-01, PNorm = 72.1764, GNorm = 0.7450, lr_0 = 5.2867e-04
Loss = 1.2845e-01, PNorm = 72.1875, GNorm = 0.6677, lr_0 = 5.2831e-04
Loss = 1.3162e-01, PNorm = 72.1983, GNorm = 0.8217, lr_0 = 5.2795e-04
Loss = 1.5152e-01, PNorm = 72.2097, GNorm = 0.5277, lr_0 = 5.2758e-04
Loss = 1.3988e-01, PNorm = 72.2249, GNorm = 0.5176, lr_0 = 5.2722e-04
Loss = 1.4244e-01, PNorm = 72.2417, GNorm = 1.3784, lr_0 = 5.2686e-04
Loss = 1.5116e-01, PNorm = 72.2587, GNorm = 0.8214, lr_0 = 5.2650e-04
Loss = 1.4826e-01, PNorm = 72.2788, GNorm = 0.7280, lr_0 = 5.2614e-04
Loss = 1.4813e-01, PNorm = 72.2975, GNorm = 0.5619, lr_0 = 5.2578e-04
Loss = 1.4524e-01, PNorm = 72.3158, GNorm = 1.0362, lr_0 = 5.2542e-04
Loss = 1.4456e-01, PNorm = 72.3294, GNorm = 0.5759, lr_0 = 5.2506e-04
Loss = 1.3733e-01, PNorm = 72.3483, GNorm = 0.6617, lr_0 = 5.2470e-04
Loss = 1.1731e-01, PNorm = 72.3629, GNorm = 0.8400, lr_0 = 5.2434e-04
Loss = 1.3894e-01, PNorm = 72.3767, GNorm = 0.7111, lr_0 = 5.2398e-04
Loss = 1.3994e-01, PNorm = 72.3920, GNorm = 0.7508, lr_0 = 5.2362e-04
Loss = 1.1823e-01, PNorm = 72.4050, GNorm = 1.0374, lr_0 = 5.2326e-04
Loss = 1.3896e-01, PNorm = 72.4237, GNorm = 0.7806, lr_0 = 5.2290e-04
Loss = 1.5050e-01, PNorm = 72.4386, GNorm = 0.7972, lr_0 = 5.2255e-04
Loss = 1.4406e-01, PNorm = 72.4478, GNorm = 0.7215, lr_0 = 5.2219e-04
Loss = 1.6135e-01, PNorm = 72.4685, GNorm = 1.7413, lr_0 = 5.2183e-04
Loss = 1.4885e-01, PNorm = 72.4924, GNorm = 0.8540, lr_0 = 5.2147e-04
Loss = 1.3763e-01, PNorm = 72.5088, GNorm = 0.6777, lr_0 = 5.2112e-04
Loss = 1.3566e-01, PNorm = 72.5206, GNorm = 0.6449, lr_0 = 5.2076e-04
Loss = 1.6036e-01, PNorm = 72.5365, GNorm = 1.2252, lr_0 = 5.2040e-04
Loss = 1.3226e-01, PNorm = 72.5538, GNorm = 0.6900, lr_0 = 5.2005e-04
Loss = 1.3956e-01, PNorm = 72.5667, GNorm = 0.6039, lr_0 = 5.1969e-04
Loss = 1.4704e-01, PNorm = 72.5705, GNorm = 0.7330, lr_0 = 5.1933e-04
Loss = 1.4227e-01, PNorm = 72.5863, GNorm = 0.7866, lr_0 = 5.1898e-04
Loss = 1.4925e-01, PNorm = 72.6070, GNorm = 0.5639, lr_0 = 5.1862e-04
Loss = 1.3308e-01, PNorm = 72.6238, GNorm = 0.9901, lr_0 = 5.1827e-04
Loss = 1.3753e-01, PNorm = 72.6429, GNorm = 1.1157, lr_0 = 5.1791e-04
Validation mae = 0.240501
Epoch 10
Loss = 1.1598e-01, PNorm = 72.6675, GNorm = 0.6457, lr_0 = 5.1756e-04
Loss = 1.3476e-01, PNorm = 72.6798, GNorm = 0.7990, lr_0 = 5.1720e-04
Loss = 1.3166e-01, PNorm = 72.6951, GNorm = 0.8196, lr_0 = 5.1685e-04
Loss = 1.1277e-01, PNorm = 72.7073, GNorm = 1.2683, lr_0 = 5.1649e-04
Loss = 1.3724e-01, PNorm = 72.7234, GNorm = 1.1504, lr_0 = 5.1614e-04
Loss = 1.4464e-01, PNorm = 72.7401, GNorm = 0.8464, lr_0 = 5.1579e-04
Loss = 1.2113e-01, PNorm = 72.7560, GNorm = 0.5288, lr_0 = 5.1543e-04
Loss = 1.4203e-01, PNorm = 72.7718, GNorm = 0.6336, lr_0 = 5.1508e-04
Loss = 1.2503e-01, PNorm = 72.7891, GNorm = 0.8390, lr_0 = 5.1473e-04
Loss = 1.3758e-01, PNorm = 72.8014, GNorm = 0.6669, lr_0 = 5.1437e-04
Loss = 1.3221e-01, PNorm = 72.8156, GNorm = 0.7378, lr_0 = 5.1402e-04
Loss = 1.2605e-01, PNorm = 72.8263, GNorm = 1.1790, lr_0 = 5.1367e-04
Loss = 1.2738e-01, PNorm = 72.8402, GNorm = 0.6572, lr_0 = 5.1332e-04
Loss = 1.2217e-01, PNorm = 72.8551, GNorm = 0.8204, lr_0 = 5.1297e-04
Loss = 1.2346e-01, PNorm = 72.8724, GNorm = 0.6172, lr_0 = 5.1262e-04
Loss = 1.3473e-01, PNorm = 72.8890, GNorm = 0.8479, lr_0 = 5.1226e-04
Loss = 1.2934e-01, PNorm = 72.9030, GNorm = 0.7832, lr_0 = 5.1191e-04
Loss = 1.3551e-01, PNorm = 72.9188, GNorm = 1.1604, lr_0 = 5.1156e-04
Loss = 1.5100e-01, PNorm = 72.9373, GNorm = 0.6516, lr_0 = 5.1121e-04
Loss = 1.1760e-01, PNorm = 72.9529, GNorm = 0.5471, lr_0 = 5.1086e-04
Loss = 1.6690e-01, PNorm = 72.9693, GNorm = 0.6669, lr_0 = 5.1051e-04
Loss = 1.2584e-01, PNorm = 72.9851, GNorm = 0.7590, lr_0 = 5.1016e-04
Loss = 1.2988e-01, PNorm = 73.0061, GNorm = 0.8104, lr_0 = 5.0981e-04
Loss = 1.3271e-01, PNorm = 73.0253, GNorm = 0.7617, lr_0 = 5.0946e-04
Loss = 1.3661e-01, PNorm = 73.0476, GNorm = 0.6503, lr_0 = 5.0911e-04
Loss = 1.2615e-01, PNorm = 73.0634, GNorm = 0.9015, lr_0 = 5.0877e-04
Loss = 1.2108e-01, PNorm = 73.0747, GNorm = 0.8890, lr_0 = 5.0842e-04
Loss = 1.4523e-01, PNorm = 73.0849, GNorm = 1.3026, lr_0 = 5.0807e-04
Loss = 1.4012e-01, PNorm = 73.1001, GNorm = 0.6124, lr_0 = 5.0772e-04
Loss = 1.3567e-01, PNorm = 73.1190, GNorm = 0.7311, lr_0 = 5.0737e-04
Loss = 1.2825e-01, PNorm = 73.1376, GNorm = 0.6123, lr_0 = 5.0703e-04
Loss = 1.2452e-01, PNorm = 73.1536, GNorm = 0.5746, lr_0 = 5.0668e-04
Loss = 1.2664e-01, PNorm = 73.1679, GNorm = 0.5863, lr_0 = 5.0633e-04
Loss = 1.2280e-01, PNorm = 73.1829, GNorm = 0.6522, lr_0 = 5.0598e-04
Loss = 1.2287e-01, PNorm = 73.1952, GNorm = 1.1171, lr_0 = 5.0564e-04
Loss = 1.3616e-01, PNorm = 73.2076, GNorm = 0.7591, lr_0 = 5.0529e-04
Loss = 1.2502e-01, PNorm = 73.2216, GNorm = 0.7235, lr_0 = 5.0494e-04
Loss = 1.3185e-01, PNorm = 73.2340, GNorm = 0.6834, lr_0 = 5.0460e-04
Loss = 1.1630e-01, PNorm = 73.2466, GNorm = 0.8356, lr_0 = 5.0425e-04
Loss = 1.3544e-01, PNorm = 73.2607, GNorm = 0.7635, lr_0 = 5.0391e-04
Loss = 1.2526e-01, PNorm = 73.2740, GNorm = 0.6191, lr_0 = 5.0356e-04
Loss = 1.3154e-01, PNorm = 73.2896, GNorm = 0.5198, lr_0 = 5.0322e-04
Loss = 1.5162e-01, PNorm = 73.3028, GNorm = 1.0826, lr_0 = 5.0287e-04
Loss = 1.3265e-01, PNorm = 73.3133, GNorm = 0.6927, lr_0 = 5.0253e-04
Loss = 1.3112e-01, PNorm = 73.3302, GNorm = 0.9936, lr_0 = 5.0218e-04
Loss = 1.3633e-01, PNorm = 73.3419, GNorm = 0.6704, lr_0 = 5.0184e-04
Loss = 1.5151e-01, PNorm = 73.3524, GNorm = 1.0238, lr_0 = 5.0150e-04
Loss = 1.3289e-01, PNorm = 73.3669, GNorm = 0.6493, lr_0 = 5.0115e-04
Loss = 1.1800e-01, PNorm = 73.3832, GNorm = 0.5974, lr_0 = 5.0081e-04
Loss = 1.2335e-01, PNorm = 73.3943, GNorm = 1.1425, lr_0 = 5.0047e-04
Loss = 1.5839e-01, PNorm = 73.4065, GNorm = 0.7582, lr_0 = 5.0012e-04
Loss = 1.3249e-01, PNorm = 73.4189, GNorm = 0.8120, lr_0 = 4.9978e-04
Loss = 1.4115e-01, PNorm = 73.4342, GNorm = 1.0341, lr_0 = 4.9944e-04
Loss = 1.4644e-01, PNorm = 73.4565, GNorm = 1.0347, lr_0 = 4.9910e-04
Loss = 1.3518e-01, PNorm = 73.4681, GNorm = 0.5422, lr_0 = 4.9875e-04
Loss = 1.3173e-01, PNorm = 73.4862, GNorm = 0.8225, lr_0 = 4.9841e-04
Loss = 1.1992e-01, PNorm = 73.5016, GNorm = 0.7153, lr_0 = 4.9807e-04
Loss = 1.2659e-01, PNorm = 73.5202, GNorm = 0.6923, lr_0 = 4.9773e-04
Loss = 1.3310e-01, PNorm = 73.5339, GNorm = 0.7421, lr_0 = 4.9739e-04
Loss = 1.3802e-01, PNorm = 73.5533, GNorm = 0.8427, lr_0 = 4.9705e-04
Loss = 1.2850e-01, PNorm = 73.5673, GNorm = 0.6282, lr_0 = 4.9671e-04
Loss = 1.3352e-01, PNorm = 73.5811, GNorm = 0.5665, lr_0 = 4.9637e-04
Loss = 1.3937e-01, PNorm = 73.5940, GNorm = 0.6047, lr_0 = 4.9603e-04
Loss = 1.2693e-01, PNorm = 73.6074, GNorm = 0.8537, lr_0 = 4.9569e-04
Loss = 1.4450e-01, PNorm = 73.6196, GNorm = 0.9423, lr_0 = 4.9535e-04
Loss = 1.3582e-01, PNorm = 73.6339, GNorm = 1.1956, lr_0 = 4.9501e-04
Loss = 1.4482e-01, PNorm = 73.6459, GNorm = 0.5666, lr_0 = 4.9467e-04
Loss = 1.2604e-01, PNorm = 73.6577, GNorm = 1.2946, lr_0 = 4.9433e-04
Loss = 1.2967e-01, PNorm = 73.6739, GNorm = 0.9133, lr_0 = 4.9399e-04
Loss = 1.2963e-01, PNorm = 73.6927, GNorm = 0.4943, lr_0 = 4.9365e-04
Loss = 1.3166e-01, PNorm = 73.7115, GNorm = 0.9540, lr_0 = 4.9332e-04
Loss = 1.2587e-01, PNorm = 73.7241, GNorm = 0.6850, lr_0 = 4.9298e-04
Loss = 1.4640e-01, PNorm = 73.7385, GNorm = 1.5435, lr_0 = 4.9264e-04
Loss = 1.3003e-01, PNorm = 73.7491, GNorm = 1.1453, lr_0 = 4.9230e-04
Loss = 1.5143e-01, PNorm = 73.7630, GNorm = 0.9197, lr_0 = 4.9197e-04
Loss = 1.2862e-01, PNorm = 73.7745, GNorm = 0.5850, lr_0 = 4.9163e-04
Loss = 1.2920e-01, PNorm = 73.7897, GNorm = 0.5663, lr_0 = 4.9129e-04
Loss = 1.3881e-01, PNorm = 73.8043, GNorm = 0.6861, lr_0 = 4.9095e-04
Loss = 1.3622e-01, PNorm = 73.8221, GNorm = 0.8644, lr_0 = 4.9062e-04
Loss = 1.4526e-01, PNorm = 73.8310, GNorm = 0.6567, lr_0 = 4.9028e-04
Loss = 1.2730e-01, PNorm = 73.8477, GNorm = 0.6421, lr_0 = 4.8995e-04
Loss = 1.2531e-01, PNorm = 73.8537, GNorm = 0.5210, lr_0 = 4.8961e-04
Loss = 1.3155e-01, PNorm = 73.8702, GNorm = 0.9093, lr_0 = 4.8928e-04
Loss = 1.3912e-01, PNorm = 73.8847, GNorm = 1.1255, lr_0 = 4.8894e-04
Loss = 1.2263e-01, PNorm = 73.8998, GNorm = 0.7869, lr_0 = 4.8861e-04
Loss = 1.3013e-01, PNorm = 73.9149, GNorm = 0.5673, lr_0 = 4.8827e-04
Loss = 1.3813e-01, PNorm = 73.9336, GNorm = 0.6081, lr_0 = 4.8794e-04
Loss = 1.2956e-01, PNorm = 73.9488, GNorm = 0.5480, lr_0 = 4.8760e-04
Loss = 1.3332e-01, PNorm = 73.9613, GNorm = 1.0634, lr_0 = 4.8727e-04
Loss = 1.4559e-01, PNorm = 73.9764, GNorm = 0.6875, lr_0 = 4.8693e-04
Loss = 1.5166e-01, PNorm = 73.9885, GNorm = 0.8150, lr_0 = 4.8660e-04
Loss = 1.4381e-01, PNorm = 74.0028, GNorm = 0.4624, lr_0 = 4.8627e-04
Loss = 1.2510e-01, PNorm = 74.0183, GNorm = 1.1891, lr_0 = 4.8593e-04
Loss = 1.4689e-01, PNorm = 74.0371, GNorm = 0.7282, lr_0 = 4.8560e-04
Loss = 1.2917e-01, PNorm = 74.0517, GNorm = 0.6867, lr_0 = 4.8527e-04
Loss = 1.3839e-01, PNorm = 74.0648, GNorm = 0.6448, lr_0 = 4.8494e-04
Loss = 1.3909e-01, PNorm = 74.0780, GNorm = 1.5919, lr_0 = 4.8460e-04
Loss = 1.4330e-01, PNorm = 74.0957, GNorm = 0.7859, lr_0 = 4.8427e-04
Loss = 1.2813e-01, PNorm = 74.1137, GNorm = 0.6484, lr_0 = 4.8394e-04
Loss = 1.3182e-01, PNorm = 74.1246, GNorm = 0.6017, lr_0 = 4.8361e-04
Loss = 1.3794e-01, PNorm = 74.1343, GNorm = 0.6933, lr_0 = 4.8328e-04
Loss = 1.2778e-01, PNorm = 74.1458, GNorm = 0.7851, lr_0 = 4.8295e-04
Loss = 1.4179e-01, PNorm = 74.1559, GNorm = 0.7602, lr_0 = 4.8262e-04
Loss = 1.2606e-01, PNorm = 74.1671, GNorm = 0.8564, lr_0 = 4.8228e-04
Loss = 1.1928e-01, PNorm = 74.1780, GNorm = 1.1997, lr_0 = 4.8195e-04
Loss = 1.4725e-01, PNorm = 74.1911, GNorm = 0.9453, lr_0 = 4.8162e-04
Loss = 1.2452e-01, PNorm = 74.2025, GNorm = 0.5702, lr_0 = 4.8129e-04
Loss = 1.4645e-01, PNorm = 74.2182, GNorm = 0.6130, lr_0 = 4.8096e-04
Loss = 1.3409e-01, PNorm = 74.2322, GNorm = 0.6447, lr_0 = 4.8064e-04
Loss = 1.2928e-01, PNorm = 74.2458, GNorm = 0.5403, lr_0 = 4.8031e-04
Loss = 1.3362e-01, PNorm = 74.2602, GNorm = 0.6397, lr_0 = 4.7998e-04
Loss = 1.3960e-01, PNorm = 74.2727, GNorm = 0.5700, lr_0 = 4.7965e-04
Loss = 1.4204e-01, PNorm = 74.2867, GNorm = 0.7411, lr_0 = 4.7932e-04
Loss = 1.3519e-01, PNorm = 74.3089, GNorm = 0.9370, lr_0 = 4.7899e-04
Loss = 1.3977e-01, PNorm = 74.3226, GNorm = 0.9544, lr_0 = 4.7866e-04
Loss = 1.2807e-01, PNorm = 74.3291, GNorm = 0.5761, lr_0 = 4.7833e-04
Loss = 1.4307e-01, PNorm = 74.3415, GNorm = 0.6687, lr_0 = 4.7801e-04
Loss = 1.4474e-01, PNorm = 74.3486, GNorm = 0.7577, lr_0 = 4.7768e-04
Loss = 1.3500e-01, PNorm = 74.3662, GNorm = 0.8357, lr_0 = 4.7735e-04
Loss = 1.2929e-01, PNorm = 74.3866, GNorm = 0.5503, lr_0 = 4.7703e-04
Validation mae = 0.233599
Epoch 11
Loss = 1.1397e-01, PNorm = 74.4050, GNorm = 0.5364, lr_0 = 4.7670e-04
Loss = 1.1475e-01, PNorm = 74.4211, GNorm = 0.5688, lr_0 = 4.7637e-04
Loss = 1.4149e-01, PNorm = 74.4303, GNorm = 0.6961, lr_0 = 4.7605e-04
Loss = 1.2323e-01, PNorm = 74.4431, GNorm = 0.6210, lr_0 = 4.7572e-04
Loss = 1.2332e-01, PNorm = 74.4608, GNorm = 0.5354, lr_0 = 4.7539e-04
Loss = 1.1828e-01, PNorm = 74.4690, GNorm = 0.7386, lr_0 = 4.7507e-04
Loss = 1.1934e-01, PNorm = 74.4801, GNorm = 0.7761, lr_0 = 4.7474e-04
Loss = 1.3803e-01, PNorm = 74.4923, GNorm = 1.1649, lr_0 = 4.7442e-04
Loss = 1.2699e-01, PNorm = 74.5140, GNorm = 1.0062, lr_0 = 4.7409e-04
Loss = 1.2801e-01, PNorm = 74.5302, GNorm = 0.7611, lr_0 = 4.7377e-04
Loss = 1.4331e-01, PNorm = 74.5449, GNorm = 1.0264, lr_0 = 4.7344e-04
Loss = 1.2298e-01, PNorm = 74.5569, GNorm = 0.6221, lr_0 = 4.7312e-04
Loss = 1.3238e-01, PNorm = 74.5740, GNorm = 0.5975, lr_0 = 4.7279e-04
Loss = 1.2161e-01, PNorm = 74.5902, GNorm = 0.6037, lr_0 = 4.7247e-04
Loss = 1.2928e-01, PNorm = 74.6032, GNorm = 0.5950, lr_0 = 4.7215e-04
Loss = 1.3004e-01, PNorm = 74.6162, GNorm = 0.6827, lr_0 = 4.7182e-04
Loss = 1.1639e-01, PNorm = 74.6339, GNorm = 0.5465, lr_0 = 4.7150e-04
Loss = 1.2294e-01, PNorm = 74.6463, GNorm = 0.5927, lr_0 = 4.7118e-04
Loss = 1.2955e-01, PNorm = 74.6583, GNorm = 1.0800, lr_0 = 4.7085e-04
Loss = 1.2684e-01, PNorm = 74.6692, GNorm = 1.0351, lr_0 = 4.7053e-04
Loss = 1.2047e-01, PNorm = 74.6832, GNorm = 0.6181, lr_0 = 4.7021e-04
Loss = 1.5487e-01, PNorm = 74.7027, GNorm = 0.6171, lr_0 = 4.6989e-04
Loss = 1.1718e-01, PNorm = 74.7203, GNorm = 0.6898, lr_0 = 4.6957e-04
Loss = 1.3542e-01, PNorm = 74.7365, GNorm = 0.7858, lr_0 = 4.6924e-04
Loss = 1.1199e-01, PNorm = 74.7508, GNorm = 0.8120, lr_0 = 4.6892e-04
Loss = 1.2705e-01, PNorm = 74.7628, GNorm = 0.6270, lr_0 = 4.6860e-04
Loss = 1.4042e-01, PNorm = 74.7805, GNorm = 0.6552, lr_0 = 4.6828e-04
Loss = 1.2752e-01, PNorm = 74.7943, GNorm = 0.8776, lr_0 = 4.6796e-04
Loss = 1.2972e-01, PNorm = 74.8115, GNorm = 0.6753, lr_0 = 4.6764e-04
Loss = 1.3256e-01, PNorm = 74.8298, GNorm = 0.8257, lr_0 = 4.6732e-04
Loss = 1.1303e-01, PNorm = 74.8362, GNorm = 0.6585, lr_0 = 4.6700e-04
Loss = 1.3071e-01, PNorm = 74.8446, GNorm = 0.9666, lr_0 = 4.6668e-04
Loss = 1.2507e-01, PNorm = 74.8540, GNorm = 0.8135, lr_0 = 4.6636e-04
Loss = 1.1446e-01, PNorm = 74.8598, GNorm = 0.6439, lr_0 = 4.6604e-04
Loss = 1.3327e-01, PNorm = 74.8677, GNorm = 0.6924, lr_0 = 4.6572e-04
Loss = 1.3266e-01, PNorm = 74.8814, GNorm = 0.6008, lr_0 = 4.6540e-04
Loss = 1.3361e-01, PNorm = 74.8952, GNorm = 0.5916, lr_0 = 4.6508e-04
Loss = 1.5153e-01, PNorm = 74.9124, GNorm = 0.8822, lr_0 = 4.6476e-04
Loss = 1.3025e-01, PNorm = 74.9263, GNorm = 0.8759, lr_0 = 4.6445e-04
Loss = 1.3354e-01, PNorm = 74.9402, GNorm = 0.5887, lr_0 = 4.6413e-04
Loss = 1.0633e-01, PNorm = 74.9559, GNorm = 0.6764, lr_0 = 4.6381e-04
Loss = 1.1021e-01, PNorm = 74.9679, GNorm = 0.7234, lr_0 = 4.6349e-04
Loss = 1.4394e-01, PNorm = 74.9771, GNorm = 0.6136, lr_0 = 4.6317e-04
Loss = 1.2953e-01, PNorm = 74.9938, GNorm = 0.9093, lr_0 = 4.6286e-04
Loss = 1.1668e-01, PNorm = 75.0052, GNorm = 0.7021, lr_0 = 4.6254e-04
Loss = 1.2776e-01, PNorm = 75.0183, GNorm = 1.1436, lr_0 = 4.6222e-04
Loss = 1.2103e-01, PNorm = 75.0180, GNorm = 0.6907, lr_0 = 4.6191e-04
Loss = 1.2873e-01, PNorm = 75.0300, GNorm = 0.8486, lr_0 = 4.6159e-04
Loss = 1.3245e-01, PNorm = 75.0408, GNorm = 0.6804, lr_0 = 4.6127e-04
Loss = 1.1981e-01, PNorm = 75.0561, GNorm = 0.6748, lr_0 = 4.6096e-04
Loss = 1.3960e-01, PNorm = 75.0748, GNorm = 1.0689, lr_0 = 4.6064e-04
Loss = 1.2236e-01, PNorm = 75.0919, GNorm = 0.7396, lr_0 = 4.6033e-04
Loss = 1.2925e-01, PNorm = 75.1012, GNorm = 0.5792, lr_0 = 4.6001e-04
Loss = 1.3087e-01, PNorm = 75.1191, GNorm = 0.8409, lr_0 = 4.5970e-04
Loss = 1.3852e-01, PNorm = 75.1383, GNorm = 0.7302, lr_0 = 4.5938e-04
Loss = 1.2853e-01, PNorm = 75.1513, GNorm = 0.5319, lr_0 = 4.5907e-04
Loss = 1.2418e-01, PNorm = 75.1628, GNorm = 0.6313, lr_0 = 4.5875e-04
Loss = 1.3296e-01, PNorm = 75.1746, GNorm = 0.6599, lr_0 = 4.5844e-04
Loss = 1.1858e-01, PNorm = 75.1869, GNorm = 0.6739, lr_0 = 4.5812e-04
Loss = 1.1019e-01, PNorm = 75.1976, GNorm = 0.6758, lr_0 = 4.5781e-04
Loss = 1.3185e-01, PNorm = 75.2064, GNorm = 0.6504, lr_0 = 4.5750e-04
Loss = 1.3699e-01, PNorm = 75.2143, GNorm = 1.0629, lr_0 = 4.5718e-04
Loss = 1.2382e-01, PNorm = 75.2220, GNorm = 0.6478, lr_0 = 4.5687e-04
Loss = 1.2330e-01, PNorm = 75.2295, GNorm = 0.6524, lr_0 = 4.5656e-04
Loss = 1.3569e-01, PNorm = 75.2417, GNorm = 0.7302, lr_0 = 4.5624e-04
Loss = 1.3637e-01, PNorm = 75.2506, GNorm = 0.6754, lr_0 = 4.5593e-04
Loss = 1.2270e-01, PNorm = 75.2601, GNorm = 0.7931, lr_0 = 4.5562e-04
Loss = 1.2191e-01, PNorm = 75.2724, GNorm = 0.6538, lr_0 = 4.5531e-04
Loss = 1.2701e-01, PNorm = 75.2843, GNorm = 0.7258, lr_0 = 4.5499e-04
Loss = 1.1772e-01, PNorm = 75.2978, GNorm = 0.5006, lr_0 = 4.5468e-04
Loss = 1.1703e-01, PNorm = 75.3129, GNorm = 0.7273, lr_0 = 4.5437e-04
Loss = 1.1884e-01, PNorm = 75.3251, GNorm = 0.5418, lr_0 = 4.5406e-04
Loss = 1.2256e-01, PNorm = 75.3345, GNorm = 0.6075, lr_0 = 4.5375e-04
Loss = 1.3450e-01, PNorm = 75.3435, GNorm = 0.6179, lr_0 = 4.5344e-04
Loss = 1.1501e-01, PNorm = 75.3560, GNorm = 0.9529, lr_0 = 4.5313e-04
Loss = 1.1353e-01, PNorm = 75.3707, GNorm = 0.6482, lr_0 = 4.5282e-04
Loss = 1.2323e-01, PNorm = 75.3832, GNorm = 0.9778, lr_0 = 4.5251e-04
Loss = 1.5821e-01, PNorm = 75.3921, GNorm = 1.5739, lr_0 = 4.5220e-04
Loss = 1.4545e-01, PNorm = 75.4015, GNorm = 1.0758, lr_0 = 4.5189e-04
Loss = 1.4375e-01, PNorm = 75.4139, GNorm = 0.6107, lr_0 = 4.5158e-04
Loss = 1.3752e-01, PNorm = 75.4339, GNorm = 1.0431, lr_0 = 4.5127e-04
Loss = 1.3994e-01, PNorm = 75.4417, GNorm = 0.6755, lr_0 = 4.5096e-04
Loss = 1.2074e-01, PNorm = 75.4623, GNorm = 0.8680, lr_0 = 4.5065e-04
Loss = 1.3633e-01, PNorm = 75.4860, GNorm = 0.7238, lr_0 = 4.5034e-04
Loss = 1.2412e-01, PNorm = 75.5021, GNorm = 0.6096, lr_0 = 4.5003e-04
Loss = 1.2320e-01, PNorm = 75.5171, GNorm = 1.7917, lr_0 = 4.4972e-04
Loss = 1.4030e-01, PNorm = 75.5333, GNorm = 0.8825, lr_0 = 4.4942e-04
Loss = 1.5041e-01, PNorm = 75.5518, GNorm = 1.1455, lr_0 = 4.4911e-04
Loss = 1.0847e-01, PNorm = 75.5654, GNorm = 0.5406, lr_0 = 4.4880e-04
Loss = 1.3417e-01, PNorm = 75.5727, GNorm = 0.6711, lr_0 = 4.4849e-04
Loss = 1.1681e-01, PNorm = 75.5798, GNorm = 1.0374, lr_0 = 4.4819e-04
Loss = 1.2534e-01, PNorm = 75.5959, GNorm = 0.7331, lr_0 = 4.4788e-04
Loss = 1.1147e-01, PNorm = 75.6015, GNorm = 1.0870, lr_0 = 4.4757e-04
Loss = 1.1107e-01, PNorm = 75.6120, GNorm = 0.8019, lr_0 = 4.4727e-04
Loss = 1.1643e-01, PNorm = 75.6206, GNorm = 0.8414, lr_0 = 4.4696e-04
Loss = 1.2609e-01, PNorm = 75.6291, GNorm = 0.8452, lr_0 = 4.4665e-04
Loss = 1.2942e-01, PNorm = 75.6366, GNorm = 0.9516, lr_0 = 4.4635e-04
Loss = 1.2755e-01, PNorm = 75.6477, GNorm = 0.7280, lr_0 = 4.4604e-04
Loss = 1.3875e-01, PNorm = 75.6661, GNorm = 0.7571, lr_0 = 4.4574e-04
Loss = 1.2175e-01, PNorm = 75.6834, GNorm = 0.7151, lr_0 = 4.4543e-04
Loss = 1.2999e-01, PNorm = 75.6937, GNorm = 0.6391, lr_0 = 4.4513e-04
Loss = 1.1689e-01, PNorm = 75.7052, GNorm = 0.8689, lr_0 = 4.4482e-04
Loss = 1.2029e-01, PNorm = 75.7167, GNorm = 0.9251, lr_0 = 4.4452e-04
Loss = 1.2742e-01, PNorm = 75.7275, GNorm = 0.6406, lr_0 = 4.4421e-04
Loss = 1.3260e-01, PNorm = 75.7393, GNorm = 0.5402, lr_0 = 4.4391e-04
Loss = 1.1494e-01, PNorm = 75.7507, GNorm = 0.5200, lr_0 = 4.4360e-04
Loss = 1.2864e-01, PNorm = 75.7603, GNorm = 0.6390, lr_0 = 4.4330e-04
Loss = 1.2140e-01, PNorm = 75.7684, GNorm = 0.5359, lr_0 = 4.4299e-04
Loss = 1.1475e-01, PNorm = 75.7832, GNorm = 0.7540, lr_0 = 4.4269e-04
Loss = 1.2463e-01, PNorm = 75.7907, GNorm = 0.5662, lr_0 = 4.4239e-04
Loss = 1.2472e-01, PNorm = 75.8005, GNorm = 0.7881, lr_0 = 4.4209e-04
Loss = 1.1420e-01, PNorm = 75.8134, GNorm = 0.6658, lr_0 = 4.4178e-04
Loss = 1.2300e-01, PNorm = 75.8285, GNorm = 0.5229, lr_0 = 4.4148e-04
Loss = 1.2492e-01, PNorm = 75.8413, GNorm = 0.6202, lr_0 = 4.4118e-04
Loss = 1.2715e-01, PNorm = 75.8554, GNorm = 0.9411, lr_0 = 4.4088e-04
Loss = 1.2276e-01, PNorm = 75.8631, GNorm = 1.1720, lr_0 = 4.4057e-04
Loss = 1.2739e-01, PNorm = 75.8735, GNorm = 0.7393, lr_0 = 4.4027e-04
Loss = 1.3525e-01, PNorm = 75.8826, GNorm = 0.4780, lr_0 = 4.3997e-04
Loss = 1.4530e-01, PNorm = 75.8920, GNorm = 0.7822, lr_0 = 4.3967e-04
Loss = 1.3084e-01, PNorm = 75.9018, GNorm = 0.9969, lr_0 = 4.3937e-04
Validation mae = 0.233978
Epoch 12
Loss = 1.3425e-01, PNorm = 75.9116, GNorm = 0.7761, lr_0 = 4.3907e-04
Loss = 1.2852e-01, PNorm = 75.9191, GNorm = 0.4613, lr_0 = 4.3877e-04
Loss = 1.3144e-01, PNorm = 75.9349, GNorm = 0.7949, lr_0 = 4.3846e-04
Loss = 1.2431e-01, PNorm = 75.9569, GNorm = 0.8190, lr_0 = 4.3816e-04
Loss = 1.1752e-01, PNorm = 75.9753, GNorm = 0.6507, lr_0 = 4.3786e-04
Loss = 1.3079e-01, PNorm = 75.9900, GNorm = 0.5732, lr_0 = 4.3756e-04
Loss = 1.2549e-01, PNorm = 76.0038, GNorm = 0.4816, lr_0 = 4.3726e-04
Loss = 1.1059e-01, PNorm = 76.0127, GNorm = 0.5630, lr_0 = 4.3696e-04
Loss = 1.1416e-01, PNorm = 76.0249, GNorm = 0.5097, lr_0 = 4.3667e-04
Loss = 1.2845e-01, PNorm = 76.0411, GNorm = 1.0386, lr_0 = 4.3637e-04
Loss = 1.2275e-01, PNorm = 76.0451, GNorm = 0.6433, lr_0 = 4.3607e-04
Loss = 1.1370e-01, PNorm = 76.0517, GNorm = 0.5365, lr_0 = 4.3577e-04
Loss = 1.0890e-01, PNorm = 76.0654, GNorm = 0.6469, lr_0 = 4.3547e-04
Loss = 1.2006e-01, PNorm = 76.0844, GNorm = 0.6222, lr_0 = 4.3517e-04
Loss = 1.1911e-01, PNorm = 76.0972, GNorm = 0.4362, lr_0 = 4.3487e-04
Loss = 1.2407e-01, PNorm = 76.1134, GNorm = 0.9000, lr_0 = 4.3458e-04
Loss = 1.2245e-01, PNorm = 76.1270, GNorm = 0.6878, lr_0 = 4.3428e-04
Loss = 1.3551e-01, PNorm = 76.1419, GNorm = 0.6525, lr_0 = 4.3398e-04
Loss = 1.0911e-01, PNorm = 76.1482, GNorm = 0.6760, lr_0 = 4.3368e-04
Loss = 1.0578e-01, PNorm = 76.1608, GNorm = 0.8261, lr_0 = 4.3339e-04
Loss = 1.2376e-01, PNorm = 76.1699, GNorm = 0.8141, lr_0 = 4.3309e-04
Loss = 1.3753e-01, PNorm = 76.1856, GNorm = 0.6366, lr_0 = 4.3279e-04
Loss = 1.2954e-01, PNorm = 76.1985, GNorm = 0.7220, lr_0 = 4.3250e-04
Loss = 1.0516e-01, PNorm = 76.2127, GNorm = 1.2213, lr_0 = 4.3220e-04
Loss = 1.2019e-01, PNorm = 76.2147, GNorm = 0.5902, lr_0 = 4.3190e-04
Loss = 1.1211e-01, PNorm = 76.2239, GNorm = 0.7000, lr_0 = 4.3161e-04
Loss = 1.1262e-01, PNorm = 76.2330, GNorm = 0.5854, lr_0 = 4.3131e-04
Loss = 1.0164e-01, PNorm = 76.2436, GNorm = 0.5758, lr_0 = 4.3102e-04
Loss = 1.3316e-01, PNorm = 76.2504, GNorm = 0.6866, lr_0 = 4.3072e-04
Loss = 1.1477e-01, PNorm = 76.2622, GNorm = 0.7466, lr_0 = 4.3043e-04
Loss = 1.2445e-01, PNorm = 76.2763, GNorm = 0.6152, lr_0 = 4.3013e-04
Loss = 1.2430e-01, PNorm = 76.2870, GNorm = 0.6761, lr_0 = 4.2984e-04
Loss = 1.2533e-01, PNorm = 76.2989, GNorm = 0.8858, lr_0 = 4.2954e-04
Loss = 1.3662e-01, PNorm = 76.3085, GNorm = 1.8720, lr_0 = 4.2925e-04
Loss = 1.0683e-01, PNorm = 76.3190, GNorm = 0.5304, lr_0 = 4.2895e-04
Loss = 1.2084e-01, PNorm = 76.3337, GNorm = 1.0003, lr_0 = 4.2866e-04
Loss = 1.2098e-01, PNorm = 76.3421, GNorm = 0.7053, lr_0 = 4.2837e-04
Loss = 1.0903e-01, PNorm = 76.3490, GNorm = 0.7841, lr_0 = 4.2807e-04
Loss = 1.2363e-01, PNorm = 76.3606, GNorm = 0.7103, lr_0 = 4.2778e-04
Loss = 1.0483e-01, PNorm = 76.3717, GNorm = 0.9013, lr_0 = 4.2749e-04
Loss = 1.0791e-01, PNorm = 76.3811, GNorm = 0.4765, lr_0 = 4.2719e-04
Loss = 1.1681e-01, PNorm = 76.3924, GNorm = 0.4592, lr_0 = 4.2690e-04
Loss = 1.4518e-01, PNorm = 76.3991, GNorm = 0.7088, lr_0 = 4.2661e-04
Loss = 1.1761e-01, PNorm = 76.4174, GNorm = 1.0149, lr_0 = 4.2632e-04
Loss = 1.1570e-01, PNorm = 76.4269, GNorm = 0.6750, lr_0 = 4.2602e-04
Loss = 1.2456e-01, PNorm = 76.4361, GNorm = 1.1824, lr_0 = 4.2573e-04
Loss = 1.0828e-01, PNorm = 76.4412, GNorm = 0.3998, lr_0 = 4.2544e-04
Loss = 1.3503e-01, PNorm = 76.4548, GNorm = 0.6804, lr_0 = 4.2515e-04
Loss = 1.1914e-01, PNorm = 76.4644, GNorm = 0.4758, lr_0 = 4.2486e-04
Loss = 1.2044e-01, PNorm = 76.4753, GNorm = 0.5704, lr_0 = 4.2457e-04
Loss = 1.1761e-01, PNorm = 76.4926, GNorm = 1.0672, lr_0 = 4.2428e-04
Loss = 1.2731e-01, PNorm = 76.5060, GNorm = 0.7431, lr_0 = 4.2399e-04
Loss = 1.2066e-01, PNorm = 76.5189, GNorm = 0.5248, lr_0 = 4.2370e-04
Loss = 1.3705e-01, PNorm = 76.5326, GNorm = 0.5317, lr_0 = 4.2340e-04
Loss = 1.1830e-01, PNorm = 76.5444, GNorm = 0.6377, lr_0 = 4.2311e-04
Loss = 1.3075e-01, PNorm = 76.5588, GNorm = 0.7946, lr_0 = 4.2283e-04
Loss = 1.3314e-01, PNorm = 76.5768, GNorm = 0.7790, lr_0 = 4.2254e-04
Loss = 1.2212e-01, PNorm = 76.5856, GNorm = 0.6224, lr_0 = 4.2225e-04
Loss = 1.2333e-01, PNorm = 76.5932, GNorm = 0.8448, lr_0 = 4.2196e-04
Loss = 1.4259e-01, PNorm = 76.6028, GNorm = 0.6431, lr_0 = 4.2167e-04
Loss = 1.2302e-01, PNorm = 76.6179, GNorm = 1.2058, lr_0 = 4.2138e-04
Loss = 1.1972e-01, PNorm = 76.6342, GNorm = 0.5806, lr_0 = 4.2109e-04
Loss = 1.1613e-01, PNorm = 76.6424, GNorm = 0.8717, lr_0 = 4.2080e-04
Loss = 1.1795e-01, PNorm = 76.6499, GNorm = 0.7367, lr_0 = 4.2051e-04
Loss = 1.1764e-01, PNorm = 76.6610, GNorm = 0.5144, lr_0 = 4.2023e-04
Loss = 1.3680e-01, PNorm = 76.6765, GNorm = 0.7039, lr_0 = 4.1994e-04
Loss = 1.2937e-01, PNorm = 76.6921, GNorm = 0.7930, lr_0 = 4.1965e-04
Loss = 1.2694e-01, PNorm = 76.7078, GNorm = 0.5862, lr_0 = 4.1936e-04
Loss = 1.1968e-01, PNorm = 76.7180, GNorm = 0.6448, lr_0 = 4.1907e-04
Loss = 1.1201e-01, PNorm = 76.7273, GNorm = 0.9500, lr_0 = 4.1879e-04
Loss = 1.3834e-01, PNorm = 76.7367, GNorm = 0.6157, lr_0 = 4.1850e-04
Loss = 1.2871e-01, PNorm = 76.7487, GNorm = 0.8738, lr_0 = 4.1821e-04
Loss = 1.1338e-01, PNorm = 76.7604, GNorm = 0.7614, lr_0 = 4.1793e-04
Loss = 1.2949e-01, PNorm = 76.7681, GNorm = 0.5424, lr_0 = 4.1764e-04
Loss = 1.2293e-01, PNorm = 76.7748, GNorm = 0.9658, lr_0 = 4.1736e-04
Loss = 1.3193e-01, PNorm = 76.7835, GNorm = 0.5675, lr_0 = 4.1707e-04
Loss = 1.2163e-01, PNorm = 76.7946, GNorm = 0.5723, lr_0 = 4.1678e-04
Loss = 1.3189e-01, PNorm = 76.8047, GNorm = 0.5695, lr_0 = 4.1650e-04
Loss = 1.0877e-01, PNorm = 76.8166, GNorm = 0.9219, lr_0 = 4.1621e-04
Loss = 1.2700e-01, PNorm = 76.8264, GNorm = 0.6403, lr_0 = 4.1593e-04
Loss = 1.3931e-01, PNorm = 76.8364, GNorm = 0.9026, lr_0 = 4.1564e-04
Loss = 1.1121e-01, PNorm = 76.8418, GNorm = 0.4209, lr_0 = 4.1536e-04
Loss = 1.1999e-01, PNorm = 76.8500, GNorm = 0.4920, lr_0 = 4.1507e-04
Loss = 1.5314e-01, PNorm = 76.8649, GNorm = 0.5664, lr_0 = 4.1479e-04
Loss = 1.1980e-01, PNorm = 76.8790, GNorm = 0.6295, lr_0 = 4.1450e-04
Loss = 1.3609e-01, PNorm = 76.8933, GNorm = 0.6961, lr_0 = 4.1422e-04
Loss = 1.0507e-01, PNorm = 76.9044, GNorm = 0.5538, lr_0 = 4.1394e-04
Loss = 1.2808e-01, PNorm = 76.9124, GNorm = 0.5004, lr_0 = 4.1365e-04
Loss = 1.2602e-01, PNorm = 76.9216, GNorm = 0.7422, lr_0 = 4.1337e-04
Loss = 1.2622e-01, PNorm = 76.9310, GNorm = 0.6647, lr_0 = 4.1309e-04
Loss = 1.2248e-01, PNorm = 76.9405, GNorm = 0.9482, lr_0 = 4.1280e-04
Loss = 1.3030e-01, PNorm = 76.9502, GNorm = 0.7768, lr_0 = 4.1252e-04
Loss = 1.2445e-01, PNorm = 76.9654, GNorm = 1.0477, lr_0 = 4.1224e-04
Loss = 1.2126e-01, PNorm = 76.9767, GNorm = 0.5556, lr_0 = 4.1196e-04
Loss = 1.2201e-01, PNorm = 76.9892, GNorm = 0.5646, lr_0 = 4.1167e-04
Loss = 1.1874e-01, PNorm = 76.9997, GNorm = 0.5207, lr_0 = 4.1139e-04
Loss = 1.3291e-01, PNorm = 77.0071, GNorm = 1.0245, lr_0 = 4.1111e-04
Loss = 1.2036e-01, PNorm = 77.0167, GNorm = 0.5329, lr_0 = 4.1083e-04
Loss = 1.3064e-01, PNorm = 77.0260, GNorm = 0.7935, lr_0 = 4.1055e-04
Loss = 1.2373e-01, PNorm = 77.0331, GNorm = 0.7179, lr_0 = 4.1027e-04
Loss = 1.3412e-01, PNorm = 77.0440, GNorm = 0.8633, lr_0 = 4.0998e-04
Loss = 1.1894e-01, PNorm = 77.0547, GNorm = 1.2605, lr_0 = 4.0970e-04
Loss = 9.7471e-02, PNorm = 77.0678, GNorm = 0.5344, lr_0 = 4.0942e-04
Loss = 1.3974e-01, PNorm = 77.0750, GNorm = 0.8069, lr_0 = 4.0914e-04
Loss = 1.2331e-01, PNorm = 77.0876, GNorm = 0.7159, lr_0 = 4.0886e-04
Loss = 1.1734e-01, PNorm = 77.1002, GNorm = 0.6101, lr_0 = 4.0858e-04
Loss = 1.1422e-01, PNorm = 77.1146, GNorm = 0.6083, lr_0 = 4.0830e-04
Loss = 1.3395e-01, PNorm = 77.1239, GNorm = 0.5470, lr_0 = 4.0802e-04
Loss = 1.3013e-01, PNorm = 77.1343, GNorm = 0.6038, lr_0 = 4.0774e-04
Loss = 1.1969e-01, PNorm = 77.1416, GNorm = 0.7504, lr_0 = 4.0746e-04
Loss = 1.1159e-01, PNorm = 77.1530, GNorm = 0.8806, lr_0 = 4.0718e-04
Loss = 1.2201e-01, PNorm = 77.1614, GNorm = 0.5585, lr_0 = 4.0691e-04
Loss = 1.2033e-01, PNorm = 77.1706, GNorm = 0.6537, lr_0 = 4.0663e-04
Loss = 1.1478e-01, PNorm = 77.1791, GNorm = 0.9157, lr_0 = 4.0635e-04
Loss = 1.1034e-01, PNorm = 77.1885, GNorm = 1.2907, lr_0 = 4.0607e-04
Loss = 1.2239e-01, PNorm = 77.1975, GNorm = 0.7786, lr_0 = 4.0579e-04
Loss = 1.2544e-01, PNorm = 77.2108, GNorm = 0.4914, lr_0 = 4.0551e-04
Loss = 1.3246e-01, PNorm = 77.2173, GNorm = 0.6562, lr_0 = 4.0524e-04
Loss = 1.3566e-01, PNorm = 77.2243, GNorm = 0.8095, lr_0 = 4.0496e-04
Loss = 1.2416e-01, PNorm = 77.2284, GNorm = 0.5259, lr_0 = 4.0468e-04
Validation mae = 0.235608
Epoch 13
Loss = 1.1474e-01, PNorm = 77.2382, GNorm = 0.6528, lr_0 = 4.0440e-04
Loss = 1.3123e-01, PNorm = 77.2495, GNorm = 0.7412, lr_0 = 4.0413e-04
Loss = 1.2087e-01, PNorm = 77.2639, GNorm = 1.0497, lr_0 = 4.0385e-04
Loss = 1.3073e-01, PNorm = 77.2775, GNorm = 0.6908, lr_0 = 4.0357e-04
Loss = 1.4792e-01, PNorm = 77.2892, GNorm = 0.5961, lr_0 = 4.0330e-04
Loss = 1.0449e-01, PNorm = 77.3025, GNorm = 0.5543, lr_0 = 4.0302e-04
Loss = 1.1376e-01, PNorm = 77.3149, GNorm = 0.6372, lr_0 = 4.0274e-04
Loss = 1.0883e-01, PNorm = 77.3254, GNorm = 0.8520, lr_0 = 4.0247e-04
Loss = 1.3055e-01, PNorm = 77.3387, GNorm = 0.5995, lr_0 = 4.0219e-04
Loss = 1.1235e-01, PNorm = 77.3543, GNorm = 1.4911, lr_0 = 4.0192e-04
Loss = 1.1178e-01, PNorm = 77.3604, GNorm = 0.6105, lr_0 = 4.0164e-04
Loss = 1.1109e-01, PNorm = 77.3655, GNorm = 0.5278, lr_0 = 4.0137e-04
Loss = 1.2230e-01, PNorm = 77.3720, GNorm = 0.5965, lr_0 = 4.0109e-04
Loss = 1.2237e-01, PNorm = 77.3806, GNorm = 0.7530, lr_0 = 4.0082e-04
Loss = 1.2660e-01, PNorm = 77.3909, GNorm = 0.8787, lr_0 = 4.0054e-04
Loss = 1.1664e-01, PNorm = 77.3984, GNorm = 0.5302, lr_0 = 4.0027e-04
Loss = 9.5053e-02, PNorm = 77.4021, GNorm = 0.5946, lr_0 = 3.9999e-04
Loss = 1.0233e-01, PNorm = 77.4125, GNorm = 0.7698, lr_0 = 3.9972e-04
Loss = 1.1559e-01, PNorm = 77.4213, GNorm = 0.6325, lr_0 = 3.9945e-04
Loss = 1.3420e-01, PNorm = 77.4313, GNorm = 0.8357, lr_0 = 3.9917e-04
Loss = 1.1610e-01, PNorm = 77.4451, GNorm = 1.2265, lr_0 = 3.9890e-04
Loss = 1.0665e-01, PNorm = 77.4557, GNorm = 0.7939, lr_0 = 3.9863e-04
Loss = 1.2837e-01, PNorm = 77.4655, GNorm = 0.7349, lr_0 = 3.9835e-04
Loss = 1.0981e-01, PNorm = 77.4785, GNorm = 0.5666, lr_0 = 3.9808e-04
Loss = 1.1592e-01, PNorm = 77.4889, GNorm = 0.6744, lr_0 = 3.9781e-04
Loss = 1.0163e-01, PNorm = 77.5000, GNorm = 0.5523, lr_0 = 3.9753e-04
Loss = 1.1454e-01, PNorm = 77.5122, GNorm = 0.7539, lr_0 = 3.9726e-04
Loss = 1.1938e-01, PNorm = 77.5175, GNorm = 0.8059, lr_0 = 3.9699e-04
Loss = 1.2281e-01, PNorm = 77.5273, GNorm = 0.5795, lr_0 = 3.9672e-04
Loss = 1.1227e-01, PNorm = 77.5372, GNorm = 0.6438, lr_0 = 3.9645e-04
Loss = 1.1171e-01, PNorm = 77.5491, GNorm = 0.9598, lr_0 = 3.9617e-04
Loss = 1.1904e-01, PNorm = 77.5596, GNorm = 0.6338, lr_0 = 3.9590e-04
Loss = 1.1263e-01, PNorm = 77.5665, GNorm = 0.8483, lr_0 = 3.9563e-04
Loss = 1.0282e-01, PNorm = 77.5769, GNorm = 0.4658, lr_0 = 3.9536e-04
Loss = 1.0987e-01, PNorm = 77.5842, GNorm = 1.1394, lr_0 = 3.9509e-04
Loss = 1.1273e-01, PNorm = 77.5972, GNorm = 0.7389, lr_0 = 3.9482e-04
Loss = 1.1632e-01, PNorm = 77.6050, GNorm = 1.0537, lr_0 = 3.9455e-04
Loss = 1.1399e-01, PNorm = 77.6168, GNorm = 0.8028, lr_0 = 3.9428e-04
Loss = 1.1405e-01, PNorm = 77.6309, GNorm = 0.7373, lr_0 = 3.9401e-04
Loss = 1.2135e-01, PNorm = 77.6402, GNorm = 0.6800, lr_0 = 3.9374e-04
Loss = 1.2432e-01, PNorm = 77.6472, GNorm = 0.7320, lr_0 = 3.9347e-04
Loss = 1.2793e-01, PNorm = 77.6619, GNorm = 0.4757, lr_0 = 3.9320e-04
Loss = 1.1589e-01, PNorm = 77.6790, GNorm = 0.6745, lr_0 = 3.9293e-04
Loss = 1.1357e-01, PNorm = 77.6913, GNorm = 0.5977, lr_0 = 3.9266e-04
Loss = 1.2226e-01, PNorm = 77.7020, GNorm = 0.7698, lr_0 = 3.9239e-04
Loss = 1.0392e-01, PNorm = 77.7082, GNorm = 0.6059, lr_0 = 3.9212e-04
Loss = 1.1755e-01, PNorm = 77.7171, GNorm = 0.6822, lr_0 = 3.9185e-04
Loss = 1.3331e-01, PNorm = 77.7261, GNorm = 1.0328, lr_0 = 3.9159e-04
Loss = 1.1763e-01, PNorm = 77.7432, GNorm = 0.5592, lr_0 = 3.9132e-04
Loss = 1.1901e-01, PNorm = 77.7566, GNorm = 0.6729, lr_0 = 3.9105e-04
Loss = 1.1105e-01, PNorm = 77.7656, GNorm = 0.6943, lr_0 = 3.9078e-04
Loss = 1.0072e-01, PNorm = 77.7787, GNorm = 0.6024, lr_0 = 3.9051e-04
Loss = 1.1580e-01, PNorm = 77.7932, GNorm = 0.6281, lr_0 = 3.9025e-04
Loss = 1.0869e-01, PNorm = 77.8000, GNorm = 0.7848, lr_0 = 3.8998e-04
Loss = 1.0968e-01, PNorm = 77.8059, GNorm = 0.5692, lr_0 = 3.8971e-04
Loss = 1.1341e-01, PNorm = 77.8121, GNorm = 0.5213, lr_0 = 3.8945e-04
Loss = 1.1470e-01, PNorm = 77.8181, GNorm = 0.6062, lr_0 = 3.8918e-04
Loss = 1.1593e-01, PNorm = 77.8229, GNorm = 0.4743, lr_0 = 3.8891e-04
Loss = 1.1096e-01, PNorm = 77.8321, GNorm = 0.9303, lr_0 = 3.8865e-04
Loss = 1.1296e-01, PNorm = 77.8428, GNorm = 0.5983, lr_0 = 3.8838e-04
Loss = 1.1730e-01, PNorm = 77.8531, GNorm = 0.6446, lr_0 = 3.8811e-04
Loss = 1.3264e-01, PNorm = 77.8578, GNorm = 0.7107, lr_0 = 3.8785e-04
Loss = 1.3543e-01, PNorm = 77.8655, GNorm = 0.6497, lr_0 = 3.8758e-04
Loss = 1.2975e-01, PNorm = 77.8723, GNorm = 0.7733, lr_0 = 3.8732e-04
Loss = 1.2378e-01, PNorm = 77.8833, GNorm = 0.7010, lr_0 = 3.8705e-04
Loss = 1.1485e-01, PNorm = 77.8957, GNorm = 0.6602, lr_0 = 3.8679e-04
Loss = 1.2530e-01, PNorm = 77.9081, GNorm = 0.6545, lr_0 = 3.8652e-04
Loss = 1.2306e-01, PNorm = 77.9182, GNorm = 0.9128, lr_0 = 3.8626e-04
Loss = 1.1692e-01, PNorm = 77.9283, GNorm = 0.5983, lr_0 = 3.8599e-04
Loss = 1.0253e-01, PNorm = 77.9339, GNorm = 0.6891, lr_0 = 3.8573e-04
Loss = 1.2922e-01, PNorm = 77.9426, GNorm = 0.7797, lr_0 = 3.8546e-04
Loss = 1.0668e-01, PNorm = 77.9556, GNorm = 0.5732, lr_0 = 3.8520e-04
Loss = 1.1286e-01, PNorm = 77.9637, GNorm = 1.2859, lr_0 = 3.8493e-04
Loss = 1.2875e-01, PNorm = 77.9700, GNorm = 0.7895, lr_0 = 3.8467e-04
Loss = 1.0701e-01, PNorm = 77.9819, GNorm = 0.6446, lr_0 = 3.8441e-04
Loss = 1.1922e-01, PNorm = 77.9895, GNorm = 0.5512, lr_0 = 3.8414e-04
Loss = 1.1593e-01, PNorm = 77.9993, GNorm = 0.5815, lr_0 = 3.8388e-04
Loss = 1.2476e-01, PNorm = 78.0087, GNorm = 0.6932, lr_0 = 3.8362e-04
Loss = 1.0877e-01, PNorm = 78.0175, GNorm = 0.5556, lr_0 = 3.8336e-04
Loss = 1.1603e-01, PNorm = 78.0265, GNorm = 0.4871, lr_0 = 3.8309e-04
Loss = 1.1547e-01, PNorm = 78.0395, GNorm = 0.9046, lr_0 = 3.8283e-04
Loss = 1.1676e-01, PNorm = 78.0488, GNorm = 0.8555, lr_0 = 3.8257e-04
Loss = 1.2125e-01, PNorm = 78.0579, GNorm = 0.7737, lr_0 = 3.8231e-04
Loss = 1.1429e-01, PNorm = 78.0660, GNorm = 0.6133, lr_0 = 3.8204e-04
Loss = 1.2174e-01, PNorm = 78.0732, GNorm = 0.8134, lr_0 = 3.8178e-04
Loss = 1.1567e-01, PNorm = 78.0841, GNorm = 0.6185, lr_0 = 3.8152e-04
Loss = 1.1708e-01, PNorm = 78.0949, GNorm = 0.6834, lr_0 = 3.8126e-04
Loss = 1.2539e-01, PNorm = 78.1054, GNorm = 0.8604, lr_0 = 3.8100e-04
Loss = 1.2812e-01, PNorm = 78.1180, GNorm = 1.3323, lr_0 = 3.8074e-04
Loss = 1.2305e-01, PNorm = 78.1257, GNorm = 0.6605, lr_0 = 3.8048e-04
Loss = 1.1759e-01, PNorm = 78.1345, GNorm = 0.4042, lr_0 = 3.8022e-04
Loss = 1.3191e-01, PNorm = 78.1412, GNorm = 1.1067, lr_0 = 3.7995e-04
Loss = 1.1292e-01, PNorm = 78.1494, GNorm = 0.7807, lr_0 = 3.7969e-04
Loss = 1.1831e-01, PNorm = 78.1600, GNorm = 0.9135, lr_0 = 3.7943e-04
Loss = 1.2609e-01, PNorm = 78.1726, GNorm = 0.7214, lr_0 = 3.7917e-04
Loss = 1.0901e-01, PNorm = 78.1851, GNorm = 0.6265, lr_0 = 3.7891e-04
Loss = 1.2802e-01, PNorm = 78.1970, GNorm = 0.7703, lr_0 = 3.7866e-04
Loss = 1.0948e-01, PNorm = 78.2029, GNorm = 0.5477, lr_0 = 3.7840e-04
Loss = 1.1208e-01, PNorm = 78.2127, GNorm = 0.7945, lr_0 = 3.7814e-04
Loss = 1.1795e-01, PNorm = 78.2215, GNorm = 1.1901, lr_0 = 3.7788e-04
Loss = 1.1950e-01, PNorm = 78.2303, GNorm = 0.5549, lr_0 = 3.7762e-04
Loss = 1.1204e-01, PNorm = 78.2386, GNorm = 0.6580, lr_0 = 3.7736e-04
Loss = 1.2439e-01, PNorm = 78.2448, GNorm = 0.6723, lr_0 = 3.7710e-04
Loss = 1.3776e-01, PNorm = 78.2488, GNorm = 0.8432, lr_0 = 3.7684e-04
Loss = 1.1297e-01, PNorm = 78.2505, GNorm = 0.7498, lr_0 = 3.7659e-04
Loss = 1.2539e-01, PNorm = 78.2592, GNorm = 0.7240, lr_0 = 3.7633e-04
Loss = 1.2897e-01, PNorm = 78.2718, GNorm = 0.6700, lr_0 = 3.7607e-04
Loss = 1.1037e-01, PNorm = 78.2828, GNorm = 0.6746, lr_0 = 3.7581e-04
Loss = 1.1867e-01, PNorm = 78.2875, GNorm = 0.5783, lr_0 = 3.7555e-04
Loss = 1.2902e-01, PNorm = 78.2934, GNorm = 0.7616, lr_0 = 3.7530e-04
Loss = 1.0370e-01, PNorm = 78.3033, GNorm = 0.8833, lr_0 = 3.7504e-04
Loss = 1.1370e-01, PNorm = 78.3104, GNorm = 0.7131, lr_0 = 3.7478e-04
Loss = 1.1804e-01, PNorm = 78.3199, GNorm = 0.7017, lr_0 = 3.7453e-04
Loss = 1.1875e-01, PNorm = 78.3247, GNorm = 0.8984, lr_0 = 3.7427e-04
Loss = 1.2315e-01, PNorm = 78.3308, GNorm = 0.6765, lr_0 = 3.7401e-04
Loss = 1.1397e-01, PNorm = 78.3386, GNorm = 0.8911, lr_0 = 3.7376e-04
Loss = 1.1890e-01, PNorm = 78.3466, GNorm = 0.9156, lr_0 = 3.7350e-04
Loss = 1.1187e-01, PNorm = 78.3571, GNorm = 0.4446, lr_0 = 3.7325e-04
Loss = 1.1484e-01, PNorm = 78.3701, GNorm = 0.6696, lr_0 = 3.7299e-04
Loss = 1.0245e-01, PNorm = 78.3841, GNorm = 0.5702, lr_0 = 3.7273e-04
Validation mae = 0.232471
Epoch 14
Loss = 1.0663e-01, PNorm = 78.3912, GNorm = 0.7061, lr_0 = 3.7248e-04
Loss = 1.2277e-01, PNorm = 78.3997, GNorm = 1.2191, lr_0 = 3.7222e-04
Loss = 1.0156e-01, PNorm = 78.4059, GNorm = 0.7194, lr_0 = 3.7197e-04
Loss = 9.3875e-02, PNorm = 78.4165, GNorm = 0.8532, lr_0 = 3.7171e-04
Loss = 1.1624e-01, PNorm = 78.4251, GNorm = 0.9750, lr_0 = 3.7146e-04
Loss = 1.1559e-01, PNorm = 78.4350, GNorm = 0.8375, lr_0 = 3.7120e-04
Loss = 1.0736e-01, PNorm = 78.4435, GNorm = 0.8463, lr_0 = 3.7095e-04
Loss = 1.0323e-01, PNorm = 78.4483, GNorm = 0.7224, lr_0 = 3.7070e-04
Loss = 9.7419e-02, PNorm = 78.4535, GNorm = 0.5423, lr_0 = 3.7044e-04
Loss = 1.0152e-01, PNorm = 78.4644, GNorm = 1.0052, lr_0 = 3.7019e-04
Loss = 1.1281e-01, PNorm = 78.4764, GNorm = 0.4783, lr_0 = 3.6993e-04
Loss = 1.2343e-01, PNorm = 78.4919, GNorm = 0.8930, lr_0 = 3.6968e-04
Loss = 1.0851e-01, PNorm = 78.5081, GNorm = 0.7523, lr_0 = 3.6943e-04
Loss = 1.0465e-01, PNorm = 78.5214, GNorm = 0.8841, lr_0 = 3.6917e-04
Loss = 1.1669e-01, PNorm = 78.5285, GNorm = 0.9837, lr_0 = 3.6892e-04
Loss = 1.0517e-01, PNorm = 78.5328, GNorm = 0.7476, lr_0 = 3.6867e-04
Loss = 1.0799e-01, PNorm = 78.5351, GNorm = 0.4868, lr_0 = 3.6842e-04
Loss = 1.0113e-01, PNorm = 78.5399, GNorm = 0.7232, lr_0 = 3.6816e-04
Loss = 1.1390e-01, PNorm = 78.5461, GNorm = 0.5973, lr_0 = 3.6791e-04
Loss = 1.0485e-01, PNorm = 78.5563, GNorm = 0.6788, lr_0 = 3.6766e-04
Loss = 1.0272e-01, PNorm = 78.5660, GNorm = 0.8476, lr_0 = 3.6741e-04
Loss = 1.0603e-01, PNorm = 78.5834, GNorm = 0.7095, lr_0 = 3.6716e-04
Loss = 9.8488e-02, PNorm = 78.5941, GNorm = 0.5872, lr_0 = 3.6690e-04
Loss = 1.2070e-01, PNorm = 78.6063, GNorm = 0.5762, lr_0 = 3.6665e-04
Loss = 1.1858e-01, PNorm = 78.6156, GNorm = 0.5730, lr_0 = 3.6640e-04
Loss = 9.8078e-02, PNorm = 78.6215, GNorm = 0.4658, lr_0 = 3.6615e-04
Loss = 1.0969e-01, PNorm = 78.6272, GNorm = 0.4510, lr_0 = 3.6590e-04
Loss = 1.1491e-01, PNorm = 78.6327, GNorm = 0.8881, lr_0 = 3.6565e-04
Loss = 1.0438e-01, PNorm = 78.6436, GNorm = 0.7111, lr_0 = 3.6540e-04
Loss = 1.1072e-01, PNorm = 78.6510, GNorm = 0.7997, lr_0 = 3.6515e-04
Loss = 1.0317e-01, PNorm = 78.6548, GNorm = 0.5570, lr_0 = 3.6490e-04
Loss = 1.1620e-01, PNorm = 78.6599, GNorm = 0.7041, lr_0 = 3.6465e-04
Loss = 1.0033e-01, PNorm = 78.6679, GNorm = 0.5375, lr_0 = 3.6440e-04
Loss = 1.2108e-01, PNorm = 78.6748, GNorm = 0.6278, lr_0 = 3.6415e-04
Loss = 1.2331e-01, PNorm = 78.6922, GNorm = 1.1622, lr_0 = 3.6390e-04
Loss = 1.2774e-01, PNorm = 78.7029, GNorm = 0.6846, lr_0 = 3.6365e-04
Loss = 1.1681e-01, PNorm = 78.7179, GNorm = 0.5356, lr_0 = 3.6340e-04
Loss = 1.1875e-01, PNorm = 78.7295, GNorm = 0.6290, lr_0 = 3.6315e-04
Loss = 1.1269e-01, PNorm = 78.7416, GNorm = 0.6949, lr_0 = 3.6290e-04
Loss = 1.2711e-01, PNorm = 78.7513, GNorm = 0.4503, lr_0 = 3.6266e-04
Loss = 1.2092e-01, PNorm = 78.7605, GNorm = 0.9963, lr_0 = 3.6241e-04
Loss = 1.0003e-01, PNorm = 78.7663, GNorm = 0.9768, lr_0 = 3.6216e-04
Loss = 1.0731e-01, PNorm = 78.7713, GNorm = 0.6410, lr_0 = 3.6191e-04
Loss = 1.1113e-01, PNorm = 78.7742, GNorm = 0.6748, lr_0 = 3.6166e-04
Loss = 9.1980e-02, PNorm = 78.7791, GNorm = 0.5327, lr_0 = 3.6141e-04
Loss = 1.0375e-01, PNorm = 78.7865, GNorm = 0.6056, lr_0 = 3.6117e-04
Loss = 1.1213e-01, PNorm = 78.7921, GNorm = 0.7274, lr_0 = 3.6092e-04
Loss = 1.0884e-01, PNorm = 78.8006, GNorm = 0.6208, lr_0 = 3.6067e-04
Loss = 1.0832e-01, PNorm = 78.8112, GNorm = 0.4864, lr_0 = 3.6043e-04
Loss = 1.2023e-01, PNorm = 78.8218, GNorm = 0.9217, lr_0 = 3.6018e-04
Loss = 1.1105e-01, PNorm = 78.8352, GNorm = 0.7071, lr_0 = 3.5993e-04
Loss = 1.2271e-01, PNorm = 78.8437, GNorm = 0.6354, lr_0 = 3.5969e-04
Loss = 1.0757e-01, PNorm = 78.8522, GNorm = 0.6540, lr_0 = 3.5944e-04
Loss = 1.0845e-01, PNorm = 78.8613, GNorm = 0.8246, lr_0 = 3.5919e-04
Loss = 1.0753e-01, PNorm = 78.8646, GNorm = 0.8720, lr_0 = 3.5895e-04
Loss = 1.0287e-01, PNorm = 78.8734, GNorm = 0.6724, lr_0 = 3.5870e-04
Loss = 1.0554e-01, PNorm = 78.8806, GNorm = 0.5954, lr_0 = 3.5845e-04
Loss = 1.0207e-01, PNorm = 78.8856, GNorm = 0.5630, lr_0 = 3.5821e-04
Loss = 1.1133e-01, PNorm = 78.8964, GNorm = 0.7123, lr_0 = 3.5796e-04
Loss = 1.0364e-01, PNorm = 78.9050, GNorm = 0.5450, lr_0 = 3.5772e-04
Loss = 1.3181e-01, PNorm = 78.9095, GNorm = 0.8244, lr_0 = 3.5747e-04
Loss = 1.1035e-01, PNorm = 78.9221, GNorm = 0.6638, lr_0 = 3.5723e-04
Loss = 1.1111e-01, PNorm = 78.9324, GNorm = 1.1203, lr_0 = 3.5698e-04
Loss = 1.0271e-01, PNorm = 78.9435, GNorm = 0.6731, lr_0 = 3.5674e-04
Loss = 1.0723e-01, PNorm = 78.9524, GNorm = 0.8052, lr_0 = 3.5650e-04
Loss = 1.1358e-01, PNorm = 78.9640, GNorm = 0.6059, lr_0 = 3.5625e-04
Loss = 1.2040e-01, PNorm = 78.9745, GNorm = 0.7779, lr_0 = 3.5601e-04
Loss = 1.0900e-01, PNorm = 78.9799, GNorm = 0.6087, lr_0 = 3.5576e-04
Loss = 9.3951e-02, PNorm = 78.9847, GNorm = 0.5502, lr_0 = 3.5552e-04
Loss = 1.0738e-01, PNorm = 78.9924, GNorm = 0.5417, lr_0 = 3.5528e-04
Loss = 9.7591e-02, PNorm = 78.9998, GNorm = 0.5368, lr_0 = 3.5503e-04
Loss = 1.0928e-01, PNorm = 79.0058, GNorm = 0.5163, lr_0 = 3.5479e-04
Loss = 1.2674e-01, PNorm = 79.0153, GNorm = 0.6510, lr_0 = 3.5455e-04
Loss = 1.1524e-01, PNorm = 79.0266, GNorm = 0.6775, lr_0 = 3.5430e-04
Loss = 1.0634e-01, PNorm = 79.0367, GNorm = 0.7264, lr_0 = 3.5406e-04
Loss = 1.0845e-01, PNorm = 79.0443, GNorm = 0.6733, lr_0 = 3.5382e-04
Loss = 1.1337e-01, PNorm = 79.0472, GNorm = 0.6843, lr_0 = 3.5358e-04
Loss = 1.0064e-01, PNorm = 79.0525, GNorm = 0.8151, lr_0 = 3.5333e-04
Loss = 9.4017e-02, PNorm = 79.0598, GNorm = 0.5705, lr_0 = 3.5309e-04
Loss = 1.0107e-01, PNorm = 79.0656, GNorm = 0.4792, lr_0 = 3.5285e-04
Loss = 1.0470e-01, PNorm = 79.0702, GNorm = 0.6407, lr_0 = 3.5261e-04
Loss = 1.3020e-01, PNorm = 79.0779, GNorm = 0.7903, lr_0 = 3.5237e-04
Loss = 1.1276e-01, PNorm = 79.0853, GNorm = 0.7478, lr_0 = 3.5212e-04
Loss = 1.1236e-01, PNorm = 79.0901, GNorm = 0.7490, lr_0 = 3.5188e-04
Loss = 1.1469e-01, PNorm = 79.0987, GNorm = 0.6135, lr_0 = 3.5164e-04
Loss = 1.0560e-01, PNorm = 79.1083, GNorm = 0.5520, lr_0 = 3.5140e-04
Loss = 1.2022e-01, PNorm = 79.1189, GNorm = 0.4802, lr_0 = 3.5116e-04
Loss = 1.4205e-01, PNorm = 79.1261, GNorm = 0.8136, lr_0 = 3.5092e-04
Loss = 1.2529e-01, PNorm = 79.1320, GNorm = 1.0994, lr_0 = 3.5068e-04
Loss = 1.2395e-01, PNorm = 79.1362, GNorm = 0.7177, lr_0 = 3.5044e-04
Loss = 1.3173e-01, PNorm = 79.1491, GNorm = 1.2959, lr_0 = 3.5020e-04
Loss = 1.3634e-01, PNorm = 79.1605, GNorm = 1.0025, lr_0 = 3.4996e-04
Loss = 1.2314e-01, PNorm = 79.1639, GNorm = 0.8734, lr_0 = 3.4972e-04
Loss = 1.1172e-01, PNorm = 79.1697, GNorm = 0.8728, lr_0 = 3.4948e-04
Loss = 1.1317e-01, PNorm = 79.1796, GNorm = 0.7269, lr_0 = 3.4924e-04
Loss = 1.0742e-01, PNorm = 79.1899, GNorm = 0.9331, lr_0 = 3.4900e-04
Loss = 1.1157e-01, PNorm = 79.1995, GNorm = 0.4688, lr_0 = 3.4876e-04
Loss = 1.0696e-01, PNorm = 79.2111, GNorm = 1.0090, lr_0 = 3.4852e-04
Loss = 1.0531e-01, PNorm = 79.2174, GNorm = 0.7328, lr_0 = 3.4828e-04
Loss = 1.2517e-01, PNorm = 79.2263, GNorm = 1.0372, lr_0 = 3.4805e-04
Loss = 1.2949e-01, PNorm = 79.2389, GNorm = 0.6492, lr_0 = 3.4781e-04
Loss = 1.0612e-01, PNorm = 79.2512, GNorm = 0.6598, lr_0 = 3.4757e-04
Loss = 1.2581e-01, PNorm = 79.2596, GNorm = 0.8463, lr_0 = 3.4733e-04
Loss = 1.2350e-01, PNorm = 79.2702, GNorm = 0.7011, lr_0 = 3.4709e-04
Loss = 1.1224e-01, PNorm = 79.2798, GNorm = 0.8079, lr_0 = 3.4686e-04
Loss = 1.0770e-01, PNorm = 79.2874, GNorm = 0.6381, lr_0 = 3.4662e-04
Loss = 1.0986e-01, PNorm = 79.2984, GNorm = 0.9777, lr_0 = 3.4638e-04
Loss = 1.0389e-01, PNorm = 79.3028, GNorm = 0.6243, lr_0 = 3.4614e-04
Loss = 1.0912e-01, PNorm = 79.3067, GNorm = 0.6150, lr_0 = 3.4591e-04
Loss = 9.8933e-02, PNorm = 79.3125, GNorm = 0.6244, lr_0 = 3.4567e-04
Loss = 1.2428e-01, PNorm = 79.3151, GNorm = 0.7143, lr_0 = 3.4543e-04
Loss = 1.2091e-01, PNorm = 79.3196, GNorm = 0.7794, lr_0 = 3.4520e-04
Loss = 1.1130e-01, PNorm = 79.3293, GNorm = 0.6510, lr_0 = 3.4496e-04
Loss = 1.1038e-01, PNorm = 79.3392, GNorm = 0.7149, lr_0 = 3.4472e-04
Loss = 1.3241e-01, PNorm = 79.3499, GNorm = 1.1324, lr_0 = 3.4449e-04
Loss = 1.1674e-01, PNorm = 79.3559, GNorm = 0.5352, lr_0 = 3.4425e-04
Loss = 1.2209e-01, PNorm = 79.3632, GNorm = 0.5955, lr_0 = 3.4402e-04
Loss = 1.2159e-01, PNorm = 79.3699, GNorm = 0.6408, lr_0 = 3.4378e-04
Loss = 1.0417e-01, PNorm = 79.3764, GNorm = 0.7166, lr_0 = 3.4354e-04
Loss = 1.0478e-01, PNorm = 79.3846, GNorm = 0.6570, lr_0 = 3.4331e-04
Validation mae = 0.232398
Epoch 15
Loss = 1.0388e-01, PNorm = 79.3931, GNorm = 0.5979, lr_0 = 3.4307e-04
Loss = 1.0430e-01, PNorm = 79.4040, GNorm = 0.6318, lr_0 = 3.4284e-04
Loss = 1.0816e-01, PNorm = 79.4104, GNorm = 0.7547, lr_0 = 3.4260e-04
Loss = 1.0371e-01, PNorm = 79.4194, GNorm = 0.4894, lr_0 = 3.4237e-04
Loss = 1.0767e-01, PNorm = 79.4304, GNorm = 0.6997, lr_0 = 3.4213e-04
Loss = 1.0470e-01, PNorm = 79.4371, GNorm = 0.5595, lr_0 = 3.4190e-04
Loss = 1.0329e-01, PNorm = 79.4469, GNorm = 0.7936, lr_0 = 3.4167e-04
Loss = 1.1501e-01, PNorm = 79.4542, GNorm = 0.8909, lr_0 = 3.4143e-04
Loss = 1.0560e-01, PNorm = 79.4629, GNorm = 0.9095, lr_0 = 3.4120e-04
Loss = 1.0932e-01, PNorm = 79.4741, GNorm = 0.7472, lr_0 = 3.4096e-04
Loss = 1.0955e-01, PNorm = 79.4868, GNorm = 0.6547, lr_0 = 3.4073e-04
Loss = 1.1454e-01, PNorm = 79.4973, GNorm = 0.7685, lr_0 = 3.4050e-04
Loss = 1.0351e-01, PNorm = 79.5012, GNorm = 0.8804, lr_0 = 3.4026e-04
Loss = 1.1089e-01, PNorm = 79.5078, GNorm = 0.6102, lr_0 = 3.4003e-04
Loss = 1.0315e-01, PNorm = 79.5159, GNorm = 0.6034, lr_0 = 3.3980e-04
Loss = 1.0521e-01, PNorm = 79.5260, GNorm = 0.7270, lr_0 = 3.3956e-04
Loss = 9.8709e-02, PNorm = 79.5320, GNorm = 0.5385, lr_0 = 3.3933e-04
Loss = 1.1087e-01, PNorm = 79.5420, GNorm = 0.6942, lr_0 = 3.3910e-04
Loss = 9.3831e-02, PNorm = 79.5483, GNorm = 0.7247, lr_0 = 3.3887e-04
Loss = 1.0558e-01, PNorm = 79.5524, GNorm = 0.7584, lr_0 = 3.3864e-04
Loss = 1.0897e-01, PNorm = 79.5610, GNorm = 0.7139, lr_0 = 3.3840e-04
Loss = 1.0409e-01, PNorm = 79.5670, GNorm = 0.5777, lr_0 = 3.3817e-04
Loss = 1.1320e-01, PNorm = 79.5743, GNorm = 1.0603, lr_0 = 3.3794e-04
Loss = 1.0128e-01, PNorm = 79.5838, GNorm = 0.7033, lr_0 = 3.3771e-04
Loss = 1.2730e-01, PNorm = 79.5885, GNorm = 0.9252, lr_0 = 3.3748e-04
Loss = 1.0247e-01, PNorm = 79.5983, GNorm = 0.7467, lr_0 = 3.3725e-04
Loss = 1.0288e-01, PNorm = 79.6031, GNorm = 0.7670, lr_0 = 3.3701e-04
Loss = 1.1849e-01, PNorm = 79.6085, GNorm = 0.8400, lr_0 = 3.3678e-04
Loss = 9.4369e-02, PNorm = 79.6181, GNorm = 0.5693, lr_0 = 3.3655e-04
Loss = 9.5609e-02, PNorm = 79.6275, GNorm = 0.7850, lr_0 = 3.3632e-04
Loss = 1.2375e-01, PNorm = 79.6335, GNorm = 0.7330, lr_0 = 3.3609e-04
Loss = 1.0528e-01, PNorm = 79.6435, GNorm = 0.6649, lr_0 = 3.3586e-04
Loss = 1.1505e-01, PNorm = 79.6502, GNorm = 0.6842, lr_0 = 3.3563e-04
Loss = 1.2672e-01, PNorm = 79.6593, GNorm = 0.5197, lr_0 = 3.3540e-04
Loss = 1.1181e-01, PNorm = 79.6649, GNorm = 0.7318, lr_0 = 3.3517e-04
Loss = 1.0082e-01, PNorm = 79.6704, GNorm = 0.6054, lr_0 = 3.3494e-04
Loss = 1.1537e-01, PNorm = 79.6767, GNorm = 0.7029, lr_0 = 3.3471e-04
Loss = 1.0675e-01, PNorm = 79.6861, GNorm = 0.9070, lr_0 = 3.3448e-04
Loss = 1.0387e-01, PNorm = 79.6963, GNorm = 0.8145, lr_0 = 3.3425e-04
Loss = 1.1008e-01, PNorm = 79.7034, GNorm = 0.5717, lr_0 = 3.3403e-04
Loss = 1.1408e-01, PNorm = 79.7086, GNorm = 0.5937, lr_0 = 3.3380e-04
Loss = 1.1001e-01, PNorm = 79.7169, GNorm = 0.8568, lr_0 = 3.3357e-04
Loss = 1.1673e-01, PNorm = 79.7246, GNorm = 0.7868, lr_0 = 3.3334e-04
Loss = 9.8212e-02, PNorm = 79.7327, GNorm = 0.6455, lr_0 = 3.3311e-04
Loss = 1.1687e-01, PNorm = 79.7406, GNorm = 0.5176, lr_0 = 3.3288e-04
Loss = 9.5947e-02, PNorm = 79.7472, GNorm = 0.5275, lr_0 = 3.3265e-04
Loss = 1.1974e-01, PNorm = 79.7539, GNorm = 0.8716, lr_0 = 3.3243e-04
Loss = 9.4427e-02, PNorm = 79.7632, GNorm = 0.5224, lr_0 = 3.3220e-04
Loss = 1.1475e-01, PNorm = 79.7708, GNorm = 0.7670, lr_0 = 3.3197e-04
Loss = 1.1378e-01, PNorm = 79.7797, GNorm = 0.7177, lr_0 = 3.3174e-04
Loss = 1.0227e-01, PNorm = 79.7857, GNorm = 0.7105, lr_0 = 3.3152e-04
Loss = 1.0406e-01, PNorm = 79.7933, GNorm = 0.7083, lr_0 = 3.3129e-04
Loss = 1.1268e-01, PNorm = 79.8018, GNorm = 0.8369, lr_0 = 3.3106e-04
Loss = 1.0214e-01, PNorm = 79.8101, GNorm = 0.6749, lr_0 = 3.3084e-04
Loss = 1.0773e-01, PNorm = 79.8220, GNorm = 0.6471, lr_0 = 3.3061e-04
Loss = 1.0439e-01, PNorm = 79.8344, GNorm = 0.7951, lr_0 = 3.3038e-04
Loss = 1.0234e-01, PNorm = 79.8438, GNorm = 0.6039, lr_0 = 3.3016e-04
Loss = 1.0081e-01, PNorm = 79.8518, GNorm = 0.6082, lr_0 = 3.2993e-04
Loss = 1.0382e-01, PNorm = 79.8606, GNorm = 0.7038, lr_0 = 3.2970e-04
Loss = 1.0354e-01, PNorm = 79.8672, GNorm = 0.5689, lr_0 = 3.2948e-04
Loss = 1.2231e-01, PNorm = 79.8749, GNorm = 0.7344, lr_0 = 3.2925e-04
Loss = 1.0348e-01, PNorm = 79.8828, GNorm = 0.6789, lr_0 = 3.2903e-04
Loss = 1.0409e-01, PNorm = 79.8883, GNorm = 0.7686, lr_0 = 3.2880e-04
Loss = 1.2322e-01, PNorm = 79.8971, GNorm = 0.6535, lr_0 = 3.2858e-04
Loss = 1.0367e-01, PNorm = 79.9071, GNorm = 0.5271, lr_0 = 3.2835e-04
Loss = 1.1225e-01, PNorm = 79.9138, GNorm = 0.7965, lr_0 = 3.2813e-04
Loss = 1.1292e-01, PNorm = 79.9224, GNorm = 0.7104, lr_0 = 3.2790e-04
Loss = 1.0219e-01, PNorm = 79.9291, GNorm = 0.5532, lr_0 = 3.2768e-04
Loss = 9.4633e-02, PNorm = 79.9323, GNorm = 0.4485, lr_0 = 3.2745e-04
Loss = 9.3471e-02, PNorm = 79.9361, GNorm = 0.6918, lr_0 = 3.2723e-04
Loss = 1.0954e-01, PNorm = 79.9420, GNorm = 0.6340, lr_0 = 3.2700e-04
Loss = 1.0993e-01, PNorm = 79.9441, GNorm = 0.6097, lr_0 = 3.2678e-04
Loss = 1.0137e-01, PNorm = 79.9518, GNorm = 0.6184, lr_0 = 3.2656e-04
Loss = 9.6894e-02, PNorm = 79.9564, GNorm = 0.5748, lr_0 = 3.2633e-04
Loss = 1.0610e-01, PNorm = 79.9631, GNorm = 0.8026, lr_0 = 3.2611e-04
Loss = 1.1894e-01, PNorm = 79.9666, GNorm = 0.6619, lr_0 = 3.2589e-04
Loss = 1.0799e-01, PNorm = 79.9731, GNorm = 0.9388, lr_0 = 3.2566e-04
Loss = 1.0644e-01, PNorm = 79.9781, GNorm = 0.8657, lr_0 = 3.2544e-04
Loss = 1.1404e-01, PNorm = 79.9886, GNorm = 0.6838, lr_0 = 3.2522e-04
Loss = 1.0989e-01, PNorm = 79.9942, GNorm = 1.1542, lr_0 = 3.2499e-04
Loss = 1.0423e-01, PNorm = 79.9995, GNorm = 0.7388, lr_0 = 3.2477e-04
Loss = 1.0352e-01, PNorm = 80.0079, GNorm = 0.6001, lr_0 = 3.2455e-04
Loss = 1.1060e-01, PNorm = 80.0133, GNorm = 0.8792, lr_0 = 3.2433e-04
Loss = 1.1825e-01, PNorm = 80.0205, GNorm = 0.6430, lr_0 = 3.2410e-04
Loss = 1.0151e-01, PNorm = 80.0282, GNorm = 0.8626, lr_0 = 3.2388e-04
Loss = 1.0439e-01, PNorm = 80.0351, GNorm = 0.5463, lr_0 = 3.2366e-04
Loss = 1.0521e-01, PNorm = 80.0448, GNorm = 0.6262, lr_0 = 3.2344e-04
Loss = 1.0402e-01, PNorm = 80.0511, GNorm = 0.6473, lr_0 = 3.2322e-04
Loss = 1.0891e-01, PNorm = 80.0572, GNorm = 0.5408, lr_0 = 3.2300e-04
Loss = 9.5660e-02, PNorm = 80.0667, GNorm = 0.8397, lr_0 = 3.2277e-04
Loss = 1.1761e-01, PNorm = 80.0767, GNorm = 0.6499, lr_0 = 3.2255e-04
Loss = 1.0802e-01, PNorm = 80.0865, GNorm = 0.6356, lr_0 = 3.2233e-04
Loss = 1.0780e-01, PNorm = 80.0954, GNorm = 0.7346, lr_0 = 3.2211e-04
Loss = 1.1511e-01, PNorm = 80.1036, GNorm = 0.9375, lr_0 = 3.2189e-04
Loss = 1.2655e-01, PNorm = 80.1100, GNorm = 0.9534, lr_0 = 3.2167e-04
Loss = 1.2475e-01, PNorm = 80.1177, GNorm = 0.8471, lr_0 = 3.2145e-04
Loss = 1.1909e-01, PNorm = 80.1245, GNorm = 0.6438, lr_0 = 3.2123e-04
Loss = 1.0461e-01, PNorm = 80.1327, GNorm = 0.4689, lr_0 = 3.2101e-04
Loss = 1.2348e-01, PNorm = 80.1391, GNorm = 0.6251, lr_0 = 3.2079e-04
Loss = 1.0798e-01, PNorm = 80.1490, GNorm = 0.6994, lr_0 = 3.2057e-04
Loss = 1.0692e-01, PNorm = 80.1559, GNorm = 0.9334, lr_0 = 3.2035e-04
Loss = 1.0862e-01, PNorm = 80.1591, GNorm = 0.9463, lr_0 = 3.2013e-04
Loss = 1.1342e-01, PNorm = 80.1666, GNorm = 1.0341, lr_0 = 3.1991e-04
Loss = 1.2455e-01, PNorm = 80.1719, GNorm = 0.6638, lr_0 = 3.1969e-04
Loss = 1.1093e-01, PNorm = 80.1744, GNorm = 0.5639, lr_0 = 3.1947e-04
Loss = 9.4408e-02, PNorm = 80.1802, GNorm = 0.8605, lr_0 = 3.1925e-04
Loss = 1.1033e-01, PNorm = 80.1875, GNorm = 0.6742, lr_0 = 3.1904e-04
Loss = 1.1169e-01, PNorm = 80.1938, GNorm = 0.5628, lr_0 = 3.1882e-04
Loss = 1.1517e-01, PNorm = 80.2023, GNorm = 0.7791, lr_0 = 3.1860e-04
Loss = 1.0563e-01, PNorm = 80.2091, GNorm = 0.7936, lr_0 = 3.1838e-04
Loss = 1.0200e-01, PNorm = 80.2150, GNorm = 1.0572, lr_0 = 3.1816e-04
Loss = 1.1377e-01, PNorm = 80.2216, GNorm = 0.9617, lr_0 = 3.1794e-04
Loss = 1.2115e-01, PNorm = 80.2279, GNorm = 0.8722, lr_0 = 3.1773e-04
Loss = 1.0815e-01, PNorm = 80.2352, GNorm = 0.6887, lr_0 = 3.1751e-04
Loss = 9.3737e-02, PNorm = 80.2463, GNorm = 0.6949, lr_0 = 3.1729e-04
Loss = 1.0467e-01, PNorm = 80.2545, GNorm = 0.7064, lr_0 = 3.1707e-04
Loss = 1.1356e-01, PNorm = 80.2600, GNorm = 0.7866, lr_0 = 3.1686e-04
Loss = 1.0821e-01, PNorm = 80.2626, GNorm = 0.8751, lr_0 = 3.1664e-04
Loss = 1.0624e-01, PNorm = 80.2719, GNorm = 0.5590, lr_0 = 3.1642e-04
Loss = 1.1874e-01, PNorm = 80.2801, GNorm = 0.6737, lr_0 = 3.1621e-04
Validation mae = 0.230361
Epoch 16
Loss = 1.0492e-01, PNorm = 80.2921, GNorm = 0.7574, lr_0 = 3.1599e-04
Loss = 9.8582e-02, PNorm = 80.2994, GNorm = 0.7281, lr_0 = 3.1577e-04
Loss = 9.7237e-02, PNorm = 80.3090, GNorm = 0.6075, lr_0 = 3.1556e-04
Loss = 9.7893e-02, PNorm = 80.3150, GNorm = 0.6383, lr_0 = 3.1534e-04
Loss = 9.1327e-02, PNorm = 80.3213, GNorm = 0.5336, lr_0 = 3.1512e-04
Loss = 9.4868e-02, PNorm = 80.3279, GNorm = 0.5699, lr_0 = 3.1491e-04
Loss = 9.9185e-02, PNorm = 80.3330, GNorm = 0.4978, lr_0 = 3.1469e-04
Loss = 9.7648e-02, PNorm = 80.3401, GNorm = 0.9084, lr_0 = 3.1448e-04
Loss = 1.0936e-01, PNorm = 80.3438, GNorm = 0.6142, lr_0 = 3.1426e-04
Loss = 1.0928e-01, PNorm = 80.3514, GNorm = 0.8212, lr_0 = 3.1405e-04
Loss = 1.0144e-01, PNorm = 80.3560, GNorm = 0.7200, lr_0 = 3.1383e-04
Loss = 1.0215e-01, PNorm = 80.3622, GNorm = 0.4393, lr_0 = 3.1362e-04
Loss = 1.0366e-01, PNorm = 80.3704, GNorm = 0.6778, lr_0 = 3.1340e-04
Loss = 9.7629e-02, PNorm = 80.3773, GNorm = 0.6013, lr_0 = 3.1319e-04
Loss = 1.1052e-01, PNorm = 80.3866, GNorm = 0.8440, lr_0 = 3.1297e-04
Loss = 1.0654e-01, PNorm = 80.3949, GNorm = 0.5982, lr_0 = 3.1276e-04
Loss = 9.4171e-02, PNorm = 80.3989, GNorm = 0.7962, lr_0 = 3.1254e-04
Loss = 9.9974e-02, PNorm = 80.4070, GNorm = 0.7050, lr_0 = 3.1233e-04
Loss = 1.0178e-01, PNorm = 80.4175, GNorm = 0.8195, lr_0 = 3.1212e-04
Loss = 1.0345e-01, PNorm = 80.4276, GNorm = 0.7843, lr_0 = 3.1190e-04
Loss = 8.9780e-02, PNorm = 80.4317, GNorm = 0.5038, lr_0 = 3.1169e-04
Loss = 1.0359e-01, PNorm = 80.4381, GNorm = 0.6826, lr_0 = 3.1147e-04
Loss = 1.0124e-01, PNorm = 80.4448, GNorm = 0.5484, lr_0 = 3.1126e-04
Loss = 9.9635e-02, PNorm = 80.4465, GNorm = 0.8008, lr_0 = 3.1105e-04
Loss = 9.4275e-02, PNorm = 80.4531, GNorm = 0.6704, lr_0 = 3.1083e-04
Loss = 9.3494e-02, PNorm = 80.4619, GNorm = 0.9675, lr_0 = 3.1062e-04
Loss = 9.9680e-02, PNorm = 80.4652, GNorm = 0.6918, lr_0 = 3.1041e-04
Loss = 9.6877e-02, PNorm = 80.4730, GNorm = 0.6795, lr_0 = 3.1020e-04
Loss = 9.0417e-02, PNorm = 80.4810, GNorm = 0.7433, lr_0 = 3.0998e-04
Loss = 1.0114e-01, PNorm = 80.4877, GNorm = 0.5716, lr_0 = 3.0977e-04
Loss = 1.1047e-01, PNorm = 80.4980, GNorm = 0.9197, lr_0 = 3.0956e-04
Loss = 1.1677e-01, PNorm = 80.5053, GNorm = 0.7669, lr_0 = 3.0935e-04
Loss = 9.9210e-02, PNorm = 80.5102, GNorm = 0.7056, lr_0 = 3.0914e-04
Loss = 1.0210e-01, PNorm = 80.5176, GNorm = 0.7782, lr_0 = 3.0892e-04
Loss = 1.1409e-01, PNorm = 80.5208, GNorm = 0.5938, lr_0 = 3.0871e-04
Loss = 1.0597e-01, PNorm = 80.5318, GNorm = 1.0944, lr_0 = 3.0850e-04
Loss = 1.0445e-01, PNorm = 80.5353, GNorm = 0.6870, lr_0 = 3.0829e-04
Loss = 1.2704e-01, PNorm = 80.5421, GNorm = 0.7384, lr_0 = 3.0808e-04
Loss = 9.5045e-02, PNorm = 80.5490, GNorm = 0.6723, lr_0 = 3.0787e-04
Loss = 1.0685e-01, PNorm = 80.5506, GNorm = 0.7659, lr_0 = 3.0766e-04
Loss = 1.0673e-01, PNorm = 80.5587, GNorm = 0.5232, lr_0 = 3.0745e-04
Loss = 1.0464e-01, PNorm = 80.5646, GNorm = 0.6102, lr_0 = 3.0723e-04
Loss = 1.1132e-01, PNorm = 80.5702, GNorm = 0.5429, lr_0 = 3.0702e-04
Loss = 1.0026e-01, PNorm = 80.5753, GNorm = 0.5440, lr_0 = 3.0681e-04
Loss = 1.1304e-01, PNorm = 80.5822, GNorm = 0.6192, lr_0 = 3.0660e-04
Loss = 1.0540e-01, PNorm = 80.5856, GNorm = 0.6347, lr_0 = 3.0639e-04
Loss = 1.1205e-01, PNorm = 80.5907, GNorm = 0.5820, lr_0 = 3.0618e-04
Loss = 9.8876e-02, PNorm = 80.6003, GNorm = 0.7646, lr_0 = 3.0597e-04
Loss = 1.1226e-01, PNorm = 80.6065, GNorm = 0.6287, lr_0 = 3.0576e-04
Loss = 1.0584e-01, PNorm = 80.6169, GNorm = 0.6749, lr_0 = 3.0555e-04
Loss = 9.1635e-02, PNorm = 80.6260, GNorm = 0.5845, lr_0 = 3.0535e-04
Loss = 1.1591e-01, PNorm = 80.6324, GNorm = 0.8467, lr_0 = 3.0514e-04
Loss = 1.0386e-01, PNorm = 80.6399, GNorm = 0.5340, lr_0 = 3.0493e-04
Loss = 1.0056e-01, PNorm = 80.6449, GNorm = 0.5171, lr_0 = 3.0472e-04
Loss = 9.7642e-02, PNorm = 80.6500, GNorm = 0.4280, lr_0 = 3.0451e-04
Loss = 1.0435e-01, PNorm = 80.6590, GNorm = 1.0645, lr_0 = 3.0430e-04
Loss = 1.1145e-01, PNorm = 80.6659, GNorm = 1.1545, lr_0 = 3.0409e-04
Loss = 9.4310e-02, PNorm = 80.6707, GNorm = 0.4841, lr_0 = 3.0388e-04
Loss = 1.0566e-01, PNorm = 80.6755, GNorm = 0.5229, lr_0 = 3.0368e-04
Loss = 1.0443e-01, PNorm = 80.6819, GNorm = 0.5654, lr_0 = 3.0347e-04
Loss = 9.5140e-02, PNorm = 80.6873, GNorm = 0.6701, lr_0 = 3.0326e-04
Loss = 1.1428e-01, PNorm = 80.6912, GNorm = 0.6637, lr_0 = 3.0305e-04
Loss = 1.0653e-01, PNorm = 80.6974, GNorm = 0.6778, lr_0 = 3.0284e-04
Loss = 1.1188e-01, PNorm = 80.7052, GNorm = 0.5758, lr_0 = 3.0264e-04
Loss = 9.1055e-02, PNorm = 80.7158, GNorm = 0.5290, lr_0 = 3.0243e-04
Loss = 9.3658e-02, PNorm = 80.7254, GNorm = 0.5294, lr_0 = 3.0222e-04
Loss = 9.5214e-02, PNorm = 80.7341, GNorm = 0.7779, lr_0 = 3.0202e-04
Loss = 1.1471e-01, PNorm = 80.7377, GNorm = 0.5686, lr_0 = 3.0181e-04
Loss = 9.6288e-02, PNorm = 80.7443, GNorm = 0.5394, lr_0 = 3.0160e-04
Loss = 1.0112e-01, PNorm = 80.7487, GNorm = 0.6242, lr_0 = 3.0140e-04
Loss = 9.8612e-02, PNorm = 80.7522, GNorm = 0.8758, lr_0 = 3.0119e-04
Loss = 1.0082e-01, PNorm = 80.7567, GNorm = 0.5218, lr_0 = 3.0098e-04
Loss = 1.0297e-01, PNorm = 80.7615, GNorm = 0.6383, lr_0 = 3.0078e-04
Loss = 1.1306e-01, PNorm = 80.7693, GNorm = 0.5414, lr_0 = 3.0057e-04
Loss = 9.5616e-02, PNorm = 80.7769, GNorm = 0.5394, lr_0 = 3.0036e-04
Loss = 9.0067e-02, PNorm = 80.7870, GNorm = 0.5068, lr_0 = 3.0016e-04
Loss = 1.0290e-01, PNorm = 80.7966, GNorm = 0.8536, lr_0 = 2.9995e-04
Loss = 1.0346e-01, PNorm = 80.7979, GNorm = 0.6163, lr_0 = 2.9975e-04
Loss = 1.0719e-01, PNorm = 80.8028, GNorm = 0.7491, lr_0 = 2.9954e-04
Loss = 1.1848e-01, PNorm = 80.8043, GNorm = 0.7368, lr_0 = 2.9934e-04
Loss = 1.0338e-01, PNorm = 80.8083, GNorm = 0.7814, lr_0 = 2.9913e-04
Loss = 9.3488e-02, PNorm = 80.8121, GNorm = 0.5759, lr_0 = 2.9893e-04
Loss = 1.1010e-01, PNorm = 80.8187, GNorm = 0.8374, lr_0 = 2.9872e-04
Loss = 9.8097e-02, PNorm = 80.8255, GNorm = 1.0105, lr_0 = 2.9852e-04
Loss = 1.0310e-01, PNorm = 80.8343, GNorm = 0.5671, lr_0 = 2.9831e-04
Loss = 1.0564e-01, PNorm = 80.8423, GNorm = 0.6336, lr_0 = 2.9811e-04
Loss = 1.1497e-01, PNorm = 80.8483, GNorm = 0.7520, lr_0 = 2.9790e-04
Loss = 9.1416e-02, PNorm = 80.8549, GNorm = 0.8725, lr_0 = 2.9770e-04
Loss = 1.0705e-01, PNorm = 80.8612, GNorm = 0.9912, lr_0 = 2.9750e-04
Loss = 1.0065e-01, PNorm = 80.8701, GNorm = 0.7347, lr_0 = 2.9729e-04
Loss = 1.1014e-01, PNorm = 80.8819, GNorm = 1.1149, lr_0 = 2.9709e-04
Loss = 1.1411e-01, PNorm = 80.8905, GNorm = 0.5320, lr_0 = 2.9689e-04
Loss = 1.1574e-01, PNorm = 80.9012, GNorm = 0.7454, lr_0 = 2.9668e-04
Loss = 9.3591e-02, PNorm = 80.9076, GNorm = 0.6666, lr_0 = 2.9648e-04
Loss = 9.5024e-02, PNorm = 80.9140, GNorm = 0.7710, lr_0 = 2.9628e-04
Loss = 1.0048e-01, PNorm = 80.9186, GNorm = 0.5319, lr_0 = 2.9607e-04
Loss = 9.4665e-02, PNorm = 80.9188, GNorm = 0.7567, lr_0 = 2.9587e-04
Loss = 1.1382e-01, PNorm = 80.9252, GNorm = 0.7513, lr_0 = 2.9567e-04
Loss = 1.0755e-01, PNorm = 80.9288, GNorm = 0.8828, lr_0 = 2.9546e-04
Loss = 1.0537e-01, PNorm = 80.9319, GNorm = 0.8435, lr_0 = 2.9526e-04
Loss = 9.5605e-02, PNorm = 80.9390, GNorm = 0.7414, lr_0 = 2.9506e-04
Loss = 1.0062e-01, PNorm = 80.9428, GNorm = 0.5105, lr_0 = 2.9486e-04
Loss = 9.7845e-02, PNorm = 80.9510, GNorm = 0.6333, lr_0 = 2.9466e-04
Loss = 1.0821e-01, PNorm = 80.9599, GNorm = 0.9806, lr_0 = 2.9445e-04
Loss = 1.1080e-01, PNorm = 80.9633, GNorm = 0.5979, lr_0 = 2.9425e-04
Loss = 9.9374e-02, PNorm = 80.9680, GNorm = 0.6155, lr_0 = 2.9405e-04
Loss = 1.0583e-01, PNorm = 80.9676, GNorm = 0.8686, lr_0 = 2.9385e-04
Loss = 1.1192e-01, PNorm = 80.9757, GNorm = 0.6955, lr_0 = 2.9365e-04
Loss = 1.1364e-01, PNorm = 80.9825, GNorm = 0.5467, lr_0 = 2.9345e-04
Loss = 1.0832e-01, PNorm = 80.9904, GNorm = 0.6702, lr_0 = 2.9325e-04
Loss = 1.0266e-01, PNorm = 80.9955, GNorm = 0.7351, lr_0 = 2.9305e-04
Loss = 1.0943e-01, PNorm = 81.0011, GNorm = 0.5239, lr_0 = 2.9284e-04
Loss = 1.0469e-01, PNorm = 81.0117, GNorm = 0.4612, lr_0 = 2.9264e-04
Loss = 1.0301e-01, PNorm = 81.0181, GNorm = 0.5571, lr_0 = 2.9244e-04
Loss = 9.6520e-02, PNorm = 81.0222, GNorm = 0.5181, lr_0 = 2.9224e-04
Loss = 9.1533e-02, PNorm = 81.0272, GNorm = 0.5814, lr_0 = 2.9204e-04
Loss = 1.2350e-01, PNorm = 81.0324, GNorm = 0.7724, lr_0 = 2.9184e-04
Loss = 1.0734e-01, PNorm = 81.0407, GNorm = 0.7161, lr_0 = 2.9164e-04
Loss = 1.1944e-01, PNorm = 81.0458, GNorm = 0.9893, lr_0 = 2.9144e-04
Loss = 1.0858e-01, PNorm = 81.0492, GNorm = 0.5984, lr_0 = 2.9124e-04
Validation mae = 0.229567
Epoch 17
Loss = 9.2869e-02, PNorm = 81.0520, GNorm = 0.5920, lr_0 = 2.9104e-04
Loss = 1.0282e-01, PNorm = 81.0602, GNorm = 0.6965, lr_0 = 2.9084e-04
Loss = 9.5867e-02, PNorm = 81.0689, GNorm = 0.6141, lr_0 = 2.9065e-04
Loss = 8.5631e-02, PNorm = 81.0783, GNorm = 0.5495, lr_0 = 2.9045e-04
Loss = 9.0202e-02, PNorm = 81.0845, GNorm = 0.7936, lr_0 = 2.9025e-04
Loss = 9.9798e-02, PNorm = 81.0894, GNorm = 0.5178, lr_0 = 2.9005e-04
Loss = 1.0495e-01, PNorm = 81.0981, GNorm = 0.7251, lr_0 = 2.8985e-04
Loss = 9.0204e-02, PNorm = 81.1076, GNorm = 0.5479, lr_0 = 2.8965e-04
Loss = 9.3619e-02, PNorm = 81.1154, GNorm = 0.6045, lr_0 = 2.8945e-04
Loss = 1.0114e-01, PNorm = 81.1194, GNorm = 0.7590, lr_0 = 2.8925e-04
Loss = 1.0509e-01, PNorm = 81.1205, GNorm = 0.5604, lr_0 = 2.8906e-04
Loss = 1.0464e-01, PNorm = 81.1272, GNorm = 0.6243, lr_0 = 2.8886e-04
Loss = 1.0167e-01, PNorm = 81.1356, GNorm = 0.6464, lr_0 = 2.8866e-04
Loss = 1.0062e-01, PNorm = 81.1392, GNorm = 0.5718, lr_0 = 2.8846e-04
Loss = 8.3517e-02, PNorm = 81.1436, GNorm = 0.6755, lr_0 = 2.8826e-04
Loss = 9.7850e-02, PNorm = 81.1468, GNorm = 0.8253, lr_0 = 2.8807e-04
Loss = 1.0069e-01, PNorm = 81.1555, GNorm = 0.6983, lr_0 = 2.8787e-04
Loss = 1.0003e-01, PNorm = 81.1641, GNorm = 0.5585, lr_0 = 2.8767e-04
Loss = 8.9793e-02, PNorm = 81.1714, GNorm = 0.8203, lr_0 = 2.8748e-04
Loss = 8.4040e-02, PNorm = 81.1824, GNorm = 0.7204, lr_0 = 2.8728e-04
Loss = 1.0013e-01, PNorm = 81.1918, GNorm = 0.7964, lr_0 = 2.8708e-04
Loss = 1.0367e-01, PNorm = 81.2025, GNorm = 0.5253, lr_0 = 2.8689e-04
Loss = 1.0857e-01, PNorm = 81.2107, GNorm = 0.6328, lr_0 = 2.8669e-04
Loss = 1.0238e-01, PNorm = 81.2165, GNorm = 0.9314, lr_0 = 2.8649e-04
Loss = 1.0193e-01, PNorm = 81.2191, GNorm = 0.5585, lr_0 = 2.8630e-04
Loss = 1.0108e-01, PNorm = 81.2251, GNorm = 0.6325, lr_0 = 2.8610e-04
Loss = 9.8051e-02, PNorm = 81.2301, GNorm = 0.6696, lr_0 = 2.8590e-04
Loss = 1.0081e-01, PNorm = 81.2366, GNorm = 0.6290, lr_0 = 2.8571e-04
Loss = 1.0688e-01, PNorm = 81.2438, GNorm = 0.6934, lr_0 = 2.8551e-04
Loss = 8.6683e-02, PNorm = 81.2478, GNorm = 0.7260, lr_0 = 2.8532e-04
Loss = 9.3428e-02, PNorm = 81.2504, GNorm = 0.7497, lr_0 = 2.8512e-04
Loss = 1.0017e-01, PNorm = 81.2517, GNorm = 0.6474, lr_0 = 2.8493e-04
Loss = 1.0819e-01, PNorm = 81.2543, GNorm = 0.6194, lr_0 = 2.8473e-04
Loss = 1.2165e-01, PNorm = 81.2617, GNorm = 1.0752, lr_0 = 2.8454e-04
Loss = 1.0532e-01, PNorm = 81.2685, GNorm = 0.9310, lr_0 = 2.8434e-04
Loss = 9.2244e-02, PNorm = 81.2739, GNorm = 0.8100, lr_0 = 2.8415e-04
Loss = 1.0756e-01, PNorm = 81.2840, GNorm = 1.0547, lr_0 = 2.8395e-04
Loss = 1.1057e-01, PNorm = 81.2934, GNorm = 0.6602, lr_0 = 2.8376e-04
Loss = 9.7998e-02, PNorm = 81.2988, GNorm = 0.7772, lr_0 = 2.8356e-04
Loss = 9.4583e-02, PNorm = 81.3038, GNorm = 0.6837, lr_0 = 2.8337e-04
Loss = 1.0570e-01, PNorm = 81.3104, GNorm = 1.1042, lr_0 = 2.8317e-04
Loss = 1.0051e-01, PNorm = 81.3139, GNorm = 0.7104, lr_0 = 2.8298e-04
Loss = 9.9681e-02, PNorm = 81.3181, GNorm = 0.8498, lr_0 = 2.8279e-04
Loss = 9.9293e-02, PNorm = 81.3214, GNorm = 0.8560, lr_0 = 2.8259e-04
Loss = 1.0175e-01, PNorm = 81.3307, GNorm = 0.8899, lr_0 = 2.8240e-04
Loss = 1.1200e-01, PNorm = 81.3393, GNorm = 0.8875, lr_0 = 2.8221e-04
Loss = 9.7528e-02, PNorm = 81.3469, GNorm = 0.6452, lr_0 = 2.8201e-04
Loss = 1.0497e-01, PNorm = 81.3554, GNorm = 0.7662, lr_0 = 2.8182e-04
Loss = 1.0553e-01, PNorm = 81.3562, GNorm = 0.7985, lr_0 = 2.8163e-04
Loss = 1.0825e-01, PNorm = 81.3615, GNorm = 0.8241, lr_0 = 2.8143e-04
Loss = 1.0442e-01, PNorm = 81.3660, GNorm = 0.4954, lr_0 = 2.8124e-04
Loss = 1.0954e-01, PNorm = 81.3697, GNorm = 0.8243, lr_0 = 2.8105e-04
Loss = 1.0001e-01, PNorm = 81.3780, GNorm = 1.1867, lr_0 = 2.8085e-04
Loss = 1.0239e-01, PNorm = 81.3844, GNorm = 0.8460, lr_0 = 2.8066e-04
Loss = 1.1004e-01, PNorm = 81.3891, GNorm = 0.8942, lr_0 = 2.8047e-04
Loss = 1.0602e-01, PNorm = 81.3909, GNorm = 0.7787, lr_0 = 2.8028e-04
Loss = 1.0345e-01, PNorm = 81.3950, GNorm = 0.5820, lr_0 = 2.8009e-04
Loss = 9.2504e-02, PNorm = 81.4018, GNorm = 0.5105, lr_0 = 2.7989e-04
Loss = 1.0231e-01, PNorm = 81.4035, GNorm = 0.8799, lr_0 = 2.7970e-04
Loss = 1.0352e-01, PNorm = 81.4070, GNorm = 0.5438, lr_0 = 2.7951e-04
Loss = 1.0083e-01, PNorm = 81.4138, GNorm = 0.7287, lr_0 = 2.7932e-04
Loss = 1.0887e-01, PNorm = 81.4202, GNorm = 0.6404, lr_0 = 2.7913e-04
Loss = 9.5176e-02, PNorm = 81.4250, GNorm = 0.5566, lr_0 = 2.7894e-04
Loss = 1.0179e-01, PNorm = 81.4264, GNorm = 0.5162, lr_0 = 2.7875e-04
Loss = 8.5039e-02, PNorm = 81.4325, GNorm = 0.4466, lr_0 = 2.7855e-04
Loss = 9.8366e-02, PNorm = 81.4402, GNorm = 0.6288, lr_0 = 2.7836e-04
Loss = 1.0103e-01, PNorm = 81.4444, GNorm = 0.6430, lr_0 = 2.7817e-04
Loss = 1.0465e-01, PNorm = 81.4543, GNorm = 0.8270, lr_0 = 2.7798e-04
Loss = 1.0228e-01, PNorm = 81.4597, GNorm = 0.7694, lr_0 = 2.7779e-04
Loss = 1.2343e-01, PNorm = 81.4649, GNorm = 0.6115, lr_0 = 2.7760e-04
Loss = 8.8343e-02, PNorm = 81.4747, GNorm = 0.4992, lr_0 = 2.7741e-04
Loss = 1.0721e-01, PNorm = 81.4822, GNorm = 1.3067, lr_0 = 2.7722e-04
Loss = 1.0142e-01, PNorm = 81.4860, GNorm = 0.5537, lr_0 = 2.7703e-04
Loss = 1.0757e-01, PNorm = 81.4911, GNorm = 0.7414, lr_0 = 2.7684e-04
Loss = 1.0291e-01, PNorm = 81.4985, GNorm = 0.9622, lr_0 = 2.7665e-04
Loss = 1.0279e-01, PNorm = 81.5057, GNorm = 0.4296, lr_0 = 2.7646e-04
Loss = 1.0070e-01, PNorm = 81.5133, GNorm = 0.8552, lr_0 = 2.7627e-04
Loss = 1.0633e-01, PNorm = 81.5158, GNorm = 0.6255, lr_0 = 2.7608e-04
Loss = 9.6439e-02, PNorm = 81.5217, GNorm = 0.9159, lr_0 = 2.7590e-04
Loss = 8.5754e-02, PNorm = 81.5259, GNorm = 0.7073, lr_0 = 2.7571e-04
Loss = 1.0437e-01, PNorm = 81.5322, GNorm = 0.8249, lr_0 = 2.7552e-04
Loss = 1.0835e-01, PNorm = 81.5400, GNorm = 0.7056, lr_0 = 2.7533e-04
Loss = 9.8850e-02, PNorm = 81.5422, GNorm = 0.9658, lr_0 = 2.7514e-04
Loss = 1.0526e-01, PNorm = 81.5443, GNorm = 0.7665, lr_0 = 2.7495e-04
Loss = 1.0267e-01, PNorm = 81.5525, GNorm = 0.6389, lr_0 = 2.7476e-04
Loss = 8.8827e-02, PNorm = 81.5578, GNorm = 0.7354, lr_0 = 2.7457e-04
Loss = 9.7848e-02, PNorm = 81.5620, GNorm = 1.0000, lr_0 = 2.7439e-04
Loss = 9.6685e-02, PNorm = 81.5658, GNorm = 0.5804, lr_0 = 2.7420e-04
Loss = 8.7986e-02, PNorm = 81.5729, GNorm = 0.8025, lr_0 = 2.7401e-04
Loss = 1.0807e-01, PNorm = 81.5745, GNorm = 0.5245, lr_0 = 2.7382e-04
Loss = 1.1505e-01, PNorm = 81.5768, GNorm = 0.6147, lr_0 = 2.7364e-04
Loss = 8.9832e-02, PNorm = 81.5833, GNorm = 0.6009, lr_0 = 2.7345e-04
Loss = 1.0200e-01, PNorm = 81.5848, GNorm = 0.5969, lr_0 = 2.7326e-04
Loss = 8.7591e-02, PNorm = 81.5876, GNorm = 0.5347, lr_0 = 2.7307e-04
Loss = 9.3541e-02, PNorm = 81.5922, GNorm = 0.6698, lr_0 = 2.7289e-04
Loss = 1.0204e-01, PNorm = 81.5973, GNorm = 0.5033, lr_0 = 2.7270e-04
Loss = 1.0910e-01, PNorm = 81.6045, GNorm = 0.6395, lr_0 = 2.7251e-04
Loss = 9.5141e-02, PNorm = 81.6127, GNorm = 0.6329, lr_0 = 2.7233e-04
Loss = 1.0763e-01, PNorm = 81.6171, GNorm = 0.6184, lr_0 = 2.7214e-04
Loss = 1.0945e-01, PNorm = 81.6232, GNorm = 0.5642, lr_0 = 2.7195e-04
Loss = 9.3033e-02, PNorm = 81.6285, GNorm = 0.6993, lr_0 = 2.7177e-04
Loss = 1.0292e-01, PNorm = 81.6293, GNorm = 0.6484, lr_0 = 2.7158e-04
Loss = 1.0545e-01, PNorm = 81.6339, GNorm = 0.6627, lr_0 = 2.7139e-04
Loss = 1.0335e-01, PNorm = 81.6371, GNorm = 0.7302, lr_0 = 2.7121e-04
Loss = 1.0651e-01, PNorm = 81.6476, GNorm = 0.5819, lr_0 = 2.7102e-04
Loss = 9.2065e-02, PNorm = 81.6554, GNorm = 0.5936, lr_0 = 2.7084e-04
Loss = 9.5815e-02, PNorm = 81.6581, GNorm = 0.5982, lr_0 = 2.7065e-04
Loss = 9.7801e-02, PNorm = 81.6615, GNorm = 0.6594, lr_0 = 2.7047e-04
Loss = 9.9612e-02, PNorm = 81.6658, GNorm = 0.7546, lr_0 = 2.7028e-04
Loss = 1.0464e-01, PNorm = 81.6726, GNorm = 0.6525, lr_0 = 2.7010e-04
Loss = 1.2402e-01, PNorm = 81.6817, GNorm = 0.6913, lr_0 = 2.6991e-04
Loss = 1.0918e-01, PNorm = 81.6883, GNorm = 0.6956, lr_0 = 2.6973e-04
Loss = 9.4565e-02, PNorm = 81.6939, GNorm = 0.6387, lr_0 = 2.6954e-04
Loss = 1.0315e-01, PNorm = 81.6973, GNorm = 0.7383, lr_0 = 2.6936e-04
Loss = 1.0507e-01, PNorm = 81.7013, GNorm = 0.6884, lr_0 = 2.6917e-04
Loss = 8.2463e-02, PNorm = 81.7065, GNorm = 0.7446, lr_0 = 2.6899e-04
Loss = 1.0300e-01, PNorm = 81.7137, GNorm = 0.7180, lr_0 = 2.6880e-04
Loss = 1.0613e-01, PNorm = 81.7195, GNorm = 0.5623, lr_0 = 2.6862e-04
Loss = 1.1153e-01, PNorm = 81.7246, GNorm = 0.6396, lr_0 = 2.6844e-04
Loss = 1.0027e-01, PNorm = 81.7293, GNorm = 0.7422, lr_0 = 2.6825e-04
Validation mae = 0.230275
Epoch 18
Loss = 8.6967e-02, PNorm = 81.7338, GNorm = 1.0512, lr_0 = 2.6807e-04
Loss = 9.0697e-02, PNorm = 81.7393, GNorm = 0.5585, lr_0 = 2.6788e-04
Loss = 9.7222e-02, PNorm = 81.7419, GNorm = 0.6446, lr_0 = 2.6770e-04
Loss = 8.6190e-02, PNorm = 81.7469, GNorm = 0.7644, lr_0 = 2.6752e-04
Loss = 8.9203e-02, PNorm = 81.7542, GNorm = 0.5329, lr_0 = 2.6733e-04
Loss = 9.7604e-02, PNorm = 81.7573, GNorm = 0.6352, lr_0 = 2.6715e-04
Loss = 8.7404e-02, PNorm = 81.7635, GNorm = 0.7260, lr_0 = 2.6697e-04
Loss = 1.0076e-01, PNorm = 81.7712, GNorm = 0.5927, lr_0 = 2.6678e-04
Loss = 9.1875e-02, PNorm = 81.7801, GNorm = 0.6303, lr_0 = 2.6660e-04
Loss = 8.4402e-02, PNorm = 81.7864, GNorm = 0.5439, lr_0 = 2.6642e-04
Loss = 9.9053e-02, PNorm = 81.7899, GNorm = 0.6063, lr_0 = 2.6624e-04
Loss = 1.1376e-01, PNorm = 81.7968, GNorm = 0.8365, lr_0 = 2.6605e-04
Loss = 8.3858e-02, PNorm = 81.8044, GNorm = 0.6149, lr_0 = 2.6587e-04
Loss = 9.6460e-02, PNorm = 81.8089, GNorm = 0.5603, lr_0 = 2.6569e-04
Loss = 1.0432e-01, PNorm = 81.8171, GNorm = 0.8240, lr_0 = 2.6551e-04
Loss = 9.3468e-02, PNorm = 81.8257, GNorm = 0.6113, lr_0 = 2.6533e-04
Loss = 9.3242e-02, PNorm = 81.8301, GNorm = 0.6090, lr_0 = 2.6514e-04
Loss = 8.4492e-02, PNorm = 81.8359, GNorm = 0.5679, lr_0 = 2.6496e-04
Loss = 9.0070e-02, PNorm = 81.8411, GNorm = 0.5703, lr_0 = 2.6478e-04
Loss = 1.0394e-01, PNorm = 81.8438, GNorm = 0.8821, lr_0 = 2.6460e-04
Loss = 9.7696e-02, PNorm = 81.8491, GNorm = 0.6229, lr_0 = 2.6442e-04
Loss = 8.8274e-02, PNorm = 81.8495, GNorm = 0.7562, lr_0 = 2.6424e-04
Loss = 8.6777e-02, PNorm = 81.8519, GNorm = 0.6094, lr_0 = 2.6406e-04
Loss = 9.6414e-02, PNorm = 81.8556, GNorm = 0.8006, lr_0 = 2.6388e-04
Loss = 1.0089e-01, PNorm = 81.8619, GNorm = 0.6622, lr_0 = 2.6369e-04
Loss = 9.6583e-02, PNorm = 81.8671, GNorm = 0.7338, lr_0 = 2.6351e-04
Loss = 1.1530e-01, PNorm = 81.8736, GNorm = 0.6900, lr_0 = 2.6333e-04
Loss = 9.8341e-02, PNorm = 81.8785, GNorm = 0.8192, lr_0 = 2.6315e-04
Loss = 1.1491e-01, PNorm = 81.8859, GNorm = 0.9912, lr_0 = 2.6297e-04
Loss = 8.6910e-02, PNorm = 81.8920, GNorm = 0.5936, lr_0 = 2.6279e-04
Loss = 8.3435e-02, PNorm = 81.8943, GNorm = 0.5367, lr_0 = 2.6261e-04
Loss = 1.0669e-01, PNorm = 81.9012, GNorm = 0.8346, lr_0 = 2.6243e-04
Loss = 1.0335e-01, PNorm = 81.9064, GNorm = 0.5358, lr_0 = 2.6225e-04
Loss = 1.2041e-01, PNorm = 81.9171, GNorm = 0.9994, lr_0 = 2.6207e-04
Loss = 8.7470e-02, PNorm = 81.9224, GNorm = 0.7627, lr_0 = 2.6189e-04
Loss = 9.5278e-02, PNorm = 81.9288, GNorm = 0.6827, lr_0 = 2.6171e-04
Loss = 9.8384e-02, PNorm = 81.9347, GNorm = 0.5082, lr_0 = 2.6153e-04
Loss = 9.6141e-02, PNorm = 81.9404, GNorm = 0.5592, lr_0 = 2.6136e-04
Loss = 8.2870e-02, PNorm = 81.9442, GNorm = 0.6674, lr_0 = 2.6118e-04
Loss = 9.7517e-02, PNorm = 81.9499, GNorm = 0.7469, lr_0 = 2.6100e-04
Loss = 9.5181e-02, PNorm = 81.9552, GNorm = 0.5570, lr_0 = 2.6082e-04
Loss = 9.1785e-02, PNorm = 81.9614, GNorm = 0.5736, lr_0 = 2.6064e-04
Loss = 9.6360e-02, PNorm = 81.9633, GNorm = 0.5331, lr_0 = 2.6046e-04
Loss = 8.7514e-02, PNorm = 81.9693, GNorm = 0.5753, lr_0 = 2.6028e-04
Loss = 1.0189e-01, PNorm = 81.9772, GNorm = 0.6548, lr_0 = 2.6011e-04
Loss = 1.0228e-01, PNorm = 81.9800, GNorm = 0.5747, lr_0 = 2.5993e-04
Loss = 9.5890e-02, PNorm = 81.9839, GNorm = 0.7752, lr_0 = 2.5975e-04
Loss = 1.0406e-01, PNorm = 81.9880, GNorm = 0.9030, lr_0 = 2.5957e-04
Loss = 1.0457e-01, PNorm = 81.9895, GNorm = 0.7906, lr_0 = 2.5939e-04
Loss = 9.9156e-02, PNorm = 81.9953, GNorm = 0.7359, lr_0 = 2.5922e-04
Loss = 9.6672e-02, PNorm = 82.0012, GNorm = 0.4139, lr_0 = 2.5904e-04
Loss = 1.0922e-01, PNorm = 82.0058, GNorm = 0.5154, lr_0 = 2.5886e-04
Loss = 1.0319e-01, PNorm = 82.0105, GNorm = 0.5430, lr_0 = 2.5868e-04
Loss = 8.7875e-02, PNorm = 82.0153, GNorm = 0.7127, lr_0 = 2.5851e-04
Loss = 1.0549e-01, PNorm = 82.0214, GNorm = 0.8148, lr_0 = 2.5833e-04
Loss = 1.0030e-01, PNorm = 82.0291, GNorm = 0.6488, lr_0 = 2.5815e-04
Loss = 9.2799e-02, PNorm = 82.0339, GNorm = 0.9948, lr_0 = 2.5797e-04
Loss = 9.8338e-02, PNorm = 82.0388, GNorm = 0.6893, lr_0 = 2.5780e-04
Loss = 1.0977e-01, PNorm = 82.0465, GNorm = 0.6272, lr_0 = 2.5762e-04
Loss = 9.4114e-02, PNorm = 82.0468, GNorm = 0.4016, lr_0 = 2.5745e-04
Loss = 8.8632e-02, PNorm = 82.0513, GNorm = 0.5966, lr_0 = 2.5727e-04
Loss = 9.1722e-02, PNorm = 82.0568, GNorm = 0.5437, lr_0 = 2.5709e-04
Loss = 1.0034e-01, PNorm = 82.0630, GNorm = 0.8178, lr_0 = 2.5692e-04
Loss = 1.0852e-01, PNorm = 82.0650, GNorm = 0.5251, lr_0 = 2.5674e-04
Loss = 9.1946e-02, PNorm = 82.0715, GNorm = 0.9944, lr_0 = 2.5656e-04
Loss = 1.0188e-01, PNorm = 82.0749, GNorm = 0.5115, lr_0 = 2.5639e-04
Loss = 8.1801e-02, PNorm = 82.0785, GNorm = 0.5440, lr_0 = 2.5621e-04
Loss = 9.9620e-02, PNorm = 82.0827, GNorm = 0.5475, lr_0 = 2.5604e-04
Loss = 1.0012e-01, PNorm = 82.0863, GNorm = 0.7837, lr_0 = 2.5586e-04
Loss = 9.5306e-02, PNorm = 82.0929, GNorm = 0.6298, lr_0 = 2.5569e-04
Loss = 1.0254e-01, PNorm = 82.1007, GNorm = 0.8723, lr_0 = 2.5551e-04
Loss = 8.5671e-02, PNorm = 82.1059, GNorm = 0.4952, lr_0 = 2.5534e-04
Loss = 1.0146e-01, PNorm = 82.1124, GNorm = 0.5846, lr_0 = 2.5516e-04
Loss = 9.7560e-02, PNorm = 82.1181, GNorm = 0.6691, lr_0 = 2.5499e-04
Loss = 8.3870e-02, PNorm = 82.1244, GNorm = 0.6624, lr_0 = 2.5481e-04
Loss = 9.1378e-02, PNorm = 82.1261, GNorm = 0.9724, lr_0 = 2.5464e-04
Loss = 1.0815e-01, PNorm = 82.1305, GNorm = 0.7577, lr_0 = 2.5446e-04
Loss = 1.0152e-01, PNorm = 82.1343, GNorm = 0.8687, lr_0 = 2.5429e-04
Loss = 9.1845e-02, PNorm = 82.1377, GNorm = 0.4323, lr_0 = 2.5411e-04
Loss = 9.9076e-02, PNorm = 82.1409, GNorm = 0.6999, lr_0 = 2.5394e-04
Loss = 9.3431e-02, PNorm = 82.1454, GNorm = 0.6060, lr_0 = 2.5377e-04
Loss = 9.6868e-02, PNorm = 82.1486, GNorm = 0.5666, lr_0 = 2.5359e-04
Loss = 1.0428e-01, PNorm = 82.1542, GNorm = 0.6495, lr_0 = 2.5342e-04
Loss = 1.1334e-01, PNorm = 82.1609, GNorm = 0.5684, lr_0 = 2.5325e-04
Loss = 9.8335e-02, PNorm = 82.1657, GNorm = 0.4933, lr_0 = 2.5307e-04
Loss = 9.6877e-02, PNorm = 82.1718, GNorm = 0.7414, lr_0 = 2.5290e-04
Loss = 1.0576e-01, PNorm = 82.1768, GNorm = 0.6671, lr_0 = 2.5273e-04
Loss = 8.6991e-02, PNorm = 82.1828, GNorm = 0.7479, lr_0 = 2.5255e-04
Loss = 9.1397e-02, PNorm = 82.1899, GNorm = 0.7458, lr_0 = 2.5238e-04
Loss = 9.3682e-02, PNorm = 82.1947, GNorm = 0.5606, lr_0 = 2.5221e-04
Loss = 1.1177e-01, PNorm = 82.1975, GNorm = 0.8351, lr_0 = 2.5203e-04
Loss = 9.2516e-02, PNorm = 82.2012, GNorm = 0.6753, lr_0 = 2.5186e-04
Loss = 9.7045e-02, PNorm = 82.2071, GNorm = 0.4992, lr_0 = 2.5169e-04
Loss = 8.8891e-02, PNorm = 82.2111, GNorm = 0.7793, lr_0 = 2.5152e-04
Loss = 9.4174e-02, PNorm = 82.2133, GNorm = 0.9026, lr_0 = 2.5134e-04
Loss = 1.0897e-01, PNorm = 82.2169, GNorm = 0.7475, lr_0 = 2.5117e-04
Loss = 1.0128e-01, PNorm = 82.2213, GNorm = 0.8132, lr_0 = 2.5100e-04
Loss = 9.7432e-02, PNorm = 82.2241, GNorm = 0.7610, lr_0 = 2.5083e-04
Loss = 9.3905e-02, PNorm = 82.2279, GNorm = 0.6882, lr_0 = 2.5066e-04
Loss = 1.0416e-01, PNorm = 82.2357, GNorm = 0.5615, lr_0 = 2.5048e-04
Loss = 9.4893e-02, PNorm = 82.2393, GNorm = 0.5951, lr_0 = 2.5031e-04
Loss = 9.6095e-02, PNorm = 82.2436, GNorm = 0.5221, lr_0 = 2.5014e-04
Loss = 1.0462e-01, PNorm = 82.2539, GNorm = 0.8059, lr_0 = 2.4997e-04
Loss = 1.0144e-01, PNorm = 82.2588, GNorm = 0.6136, lr_0 = 2.4980e-04
Loss = 8.8652e-02, PNorm = 82.2630, GNorm = 0.7238, lr_0 = 2.4963e-04
Loss = 8.7033e-02, PNorm = 82.2683, GNorm = 1.1284, lr_0 = 2.4946e-04
Loss = 9.6401e-02, PNorm = 82.2741, GNorm = 0.6922, lr_0 = 2.4929e-04
Loss = 9.6890e-02, PNorm = 82.2756, GNorm = 0.7885, lr_0 = 2.4911e-04
Loss = 1.1510e-01, PNorm = 82.2772, GNorm = 0.7151, lr_0 = 2.4894e-04
Loss = 1.0018e-01, PNorm = 82.2812, GNorm = 0.8031, lr_0 = 2.4877e-04
Loss = 8.2125e-02, PNorm = 82.2872, GNorm = 0.5710, lr_0 = 2.4860e-04
Loss = 1.0298e-01, PNorm = 82.2907, GNorm = 0.8708, lr_0 = 2.4843e-04
Loss = 9.4314e-02, PNorm = 82.2949, GNorm = 0.9444, lr_0 = 2.4826e-04
Loss = 9.4751e-02, PNorm = 82.3017, GNorm = 0.8194, lr_0 = 2.4809e-04
Loss = 8.9392e-02, PNorm = 82.3055, GNorm = 0.5406, lr_0 = 2.4792e-04
Loss = 8.9377e-02, PNorm = 82.3124, GNorm = 0.7238, lr_0 = 2.4775e-04
Loss = 1.0228e-01, PNorm = 82.3151, GNorm = 0.7193, lr_0 = 2.4758e-04
Loss = 8.4544e-02, PNorm = 82.3144, GNorm = 0.5525, lr_0 = 2.4741e-04
Loss = 9.9137e-02, PNorm = 82.3180, GNorm = 0.6071, lr_0 = 2.4724e-04
Loss = 9.2040e-02, PNorm = 82.3206, GNorm = 0.6082, lr_0 = 2.4707e-04
Validation mae = 0.228251
Epoch 19
Loss = 8.8410e-02, PNorm = 82.3236, GNorm = 0.7038, lr_0 = 2.4690e-04
Loss = 9.2258e-02, PNorm = 82.3301, GNorm = 0.5578, lr_0 = 2.4674e-04
Loss = 9.2791e-02, PNorm = 82.3371, GNorm = 0.7068, lr_0 = 2.4657e-04
Loss = 8.3547e-02, PNorm = 82.3422, GNorm = 0.6506, lr_0 = 2.4640e-04
Loss = 9.0481e-02, PNorm = 82.3459, GNorm = 0.6027, lr_0 = 2.4623e-04
Loss = 9.3731e-02, PNorm = 82.3537, GNorm = 0.7495, lr_0 = 2.4606e-04
Loss = 9.8154e-02, PNorm = 82.3606, GNorm = 0.5470, lr_0 = 2.4589e-04
Loss = 9.1393e-02, PNorm = 82.3696, GNorm = 0.5847, lr_0 = 2.4572e-04
Loss = 9.9985e-02, PNorm = 82.3729, GNorm = 0.5818, lr_0 = 2.4556e-04
Loss = 8.7234e-02, PNorm = 82.3758, GNorm = 0.4046, lr_0 = 2.4539e-04
Loss = 8.3227e-02, PNorm = 82.3780, GNorm = 0.6761, lr_0 = 2.4522e-04
Loss = 8.4440e-02, PNorm = 82.3810, GNorm = 0.6221, lr_0 = 2.4505e-04
Loss = 8.6052e-02, PNorm = 82.3852, GNorm = 0.5637, lr_0 = 2.4488e-04
Loss = 1.0213e-01, PNorm = 82.3899, GNorm = 0.5991, lr_0 = 2.4472e-04
Loss = 8.9257e-02, PNorm = 82.3956, GNorm = 1.1059, lr_0 = 2.4455e-04
Loss = 8.5880e-02, PNorm = 82.3998, GNorm = 0.4267, lr_0 = 2.4438e-04
Loss = 9.8315e-02, PNorm = 82.4041, GNorm = 0.7057, lr_0 = 2.4421e-04
Loss = 1.0488e-01, PNorm = 82.4125, GNorm = 0.7019, lr_0 = 2.4405e-04
Loss = 8.2611e-02, PNorm = 82.4169, GNorm = 0.5611, lr_0 = 2.4388e-04
Loss = 9.6460e-02, PNorm = 82.4196, GNorm = 0.5983, lr_0 = 2.4371e-04
Loss = 1.0690e-01, PNorm = 82.4244, GNorm = 0.6984, lr_0 = 2.4354e-04
Loss = 7.9354e-02, PNorm = 82.4313, GNorm = 0.5572, lr_0 = 2.4338e-04
Loss = 1.0061e-01, PNorm = 82.4354, GNorm = 0.7042, lr_0 = 2.4321e-04
Loss = 1.0421e-01, PNorm = 82.4393, GNorm = 0.6967, lr_0 = 2.4304e-04
Loss = 9.9195e-02, PNorm = 82.4401, GNorm = 0.5605, lr_0 = 2.4288e-04
Loss = 9.1278e-02, PNorm = 82.4424, GNorm = 0.7406, lr_0 = 2.4271e-04
Loss = 1.0119e-01, PNorm = 82.4498, GNorm = 0.7861, lr_0 = 2.4254e-04
Loss = 8.6596e-02, PNorm = 82.4567, GNorm = 0.7145, lr_0 = 2.4238e-04
Loss = 9.4341e-02, PNorm = 82.4601, GNorm = 1.0790, lr_0 = 2.4221e-04
Loss = 9.4538e-02, PNorm = 82.4628, GNorm = 0.5340, lr_0 = 2.4205e-04
Loss = 8.2678e-02, PNorm = 82.4658, GNorm = 0.8920, lr_0 = 2.4188e-04
Loss = 1.0224e-01, PNorm = 82.4677, GNorm = 0.6010, lr_0 = 2.4171e-04
Loss = 8.8368e-02, PNorm = 82.4721, GNorm = 0.6405, lr_0 = 2.4155e-04
Loss = 9.5132e-02, PNorm = 82.4763, GNorm = 0.5816, lr_0 = 2.4138e-04
Loss = 8.9849e-02, PNorm = 82.4810, GNorm = 0.6448, lr_0 = 2.4122e-04
Loss = 8.9028e-02, PNorm = 82.4863, GNorm = 0.6068, lr_0 = 2.4105e-04
Loss = 9.2337e-02, PNorm = 82.4916, GNorm = 0.6443, lr_0 = 2.4089e-04
Loss = 8.2272e-02, PNorm = 82.4946, GNorm = 0.5282, lr_0 = 2.4072e-04
Loss = 1.0256e-01, PNorm = 82.4986, GNorm = 0.7143, lr_0 = 2.4056e-04
Loss = 9.5999e-02, PNorm = 82.5015, GNorm = 0.6848, lr_0 = 2.4039e-04
Loss = 8.1298e-02, PNorm = 82.5056, GNorm = 0.6998, lr_0 = 2.4023e-04
Loss = 1.1460e-01, PNorm = 82.5073, GNorm = 0.6082, lr_0 = 2.4006e-04
Loss = 1.1098e-01, PNorm = 82.5141, GNorm = 0.6790, lr_0 = 2.3990e-04
Loss = 8.5479e-02, PNorm = 82.5197, GNorm = 0.6799, lr_0 = 2.3974e-04
Loss = 1.0267e-01, PNorm = 82.5261, GNorm = 0.9517, lr_0 = 2.3957e-04
Loss = 8.5572e-02, PNorm = 82.5295, GNorm = 0.6376, lr_0 = 2.3941e-04
Loss = 9.8233e-02, PNorm = 82.5349, GNorm = 0.6449, lr_0 = 2.3924e-04
Loss = 9.4007e-02, PNorm = 82.5435, GNorm = 0.6142, lr_0 = 2.3908e-04
Loss = 8.5847e-02, PNorm = 82.5478, GNorm = 0.7722, lr_0 = 2.3892e-04
Loss = 1.0384e-01, PNorm = 82.5536, GNorm = 0.4645, lr_0 = 2.3875e-04
Loss = 9.8700e-02, PNorm = 82.5564, GNorm = 0.7500, lr_0 = 2.3859e-04
Loss = 8.8293e-02, PNorm = 82.5611, GNorm = 0.5684, lr_0 = 2.3842e-04
Loss = 9.9020e-02, PNorm = 82.5635, GNorm = 0.7411, lr_0 = 2.3826e-04
Loss = 7.4293e-02, PNorm = 82.5667, GNorm = 0.6974, lr_0 = 2.3810e-04
Loss = 1.0299e-01, PNorm = 82.5707, GNorm = 0.6422, lr_0 = 2.3794e-04
Loss = 1.0153e-01, PNorm = 82.5753, GNorm = 0.5315, lr_0 = 2.3777e-04
Loss = 9.6334e-02, PNorm = 82.5813, GNorm = 0.8049, lr_0 = 2.3761e-04
Loss = 9.8436e-02, PNorm = 82.5878, GNorm = 1.0258, lr_0 = 2.3745e-04
Loss = 9.0506e-02, PNorm = 82.5960, GNorm = 0.5585, lr_0 = 2.3728e-04
Loss = 9.9055e-02, PNorm = 82.6001, GNorm = 0.6438, lr_0 = 2.3712e-04
Loss = 9.9208e-02, PNorm = 82.6056, GNorm = 0.5388, lr_0 = 2.3696e-04
Loss = 9.1620e-02, PNorm = 82.6101, GNorm = 0.6535, lr_0 = 2.3680e-04
Loss = 1.0323e-01, PNorm = 82.6124, GNorm = 0.7999, lr_0 = 2.3663e-04
Loss = 1.0525e-01, PNorm = 82.6161, GNorm = 0.5179, lr_0 = 2.3647e-04
Loss = 8.1035e-02, PNorm = 82.6213, GNorm = 0.4947, lr_0 = 2.3631e-04
Loss = 9.3945e-02, PNorm = 82.6251, GNorm = 0.6586, lr_0 = 2.3615e-04
Loss = 8.7619e-02, PNorm = 82.6273, GNorm = 0.6928, lr_0 = 2.3599e-04
Loss = 9.7639e-02, PNorm = 82.6313, GNorm = 0.6784, lr_0 = 2.3582e-04
Loss = 8.8374e-02, PNorm = 82.6349, GNorm = 0.6369, lr_0 = 2.3566e-04
Loss = 9.8806e-02, PNorm = 82.6399, GNorm = 0.5717, lr_0 = 2.3550e-04
Loss = 1.0050e-01, PNorm = 82.6459, GNorm = 0.6497, lr_0 = 2.3534e-04
Loss = 9.8300e-02, PNorm = 82.6528, GNorm = 0.6532, lr_0 = 2.3518e-04
Loss = 8.8953e-02, PNorm = 82.6567, GNorm = 0.8363, lr_0 = 2.3502e-04
Loss = 8.9127e-02, PNorm = 82.6615, GNorm = 0.6125, lr_0 = 2.3486e-04
Loss = 9.2365e-02, PNorm = 82.6655, GNorm = 0.6154, lr_0 = 2.3470e-04
Loss = 9.2244e-02, PNorm = 82.6691, GNorm = 0.6514, lr_0 = 2.3454e-04
Loss = 9.7253e-02, PNorm = 82.6748, GNorm = 0.9408, lr_0 = 2.3437e-04
Loss = 8.8723e-02, PNorm = 82.6791, GNorm = 0.8659, lr_0 = 2.3421e-04
Loss = 8.5951e-02, PNorm = 82.6809, GNorm = 0.6780, lr_0 = 2.3405e-04
Loss = 9.2290e-02, PNorm = 82.6832, GNorm = 0.4825, lr_0 = 2.3389e-04
Loss = 8.7081e-02, PNorm = 82.6878, GNorm = 0.7687, lr_0 = 2.3373e-04
Loss = 9.0949e-02, PNorm = 82.6937, GNorm = 0.7605, lr_0 = 2.3357e-04
Loss = 9.6938e-02, PNorm = 82.6986, GNorm = 0.8508, lr_0 = 2.3341e-04
Loss = 9.9620e-02, PNorm = 82.7045, GNorm = 0.7816, lr_0 = 2.3325e-04
Loss = 8.3323e-02, PNorm = 82.7068, GNorm = 0.7164, lr_0 = 2.3309e-04
Loss = 9.0707e-02, PNorm = 82.7116, GNorm = 0.5380, lr_0 = 2.3293e-04
Loss = 1.0164e-01, PNorm = 82.7139, GNorm = 0.7779, lr_0 = 2.3277e-04
Loss = 1.0084e-01, PNorm = 82.7170, GNorm = 0.7459, lr_0 = 2.3261e-04
Loss = 1.0722e-01, PNorm = 82.7224, GNorm = 0.5786, lr_0 = 2.3246e-04
Loss = 1.0295e-01, PNorm = 82.7320, GNorm = 0.7206, lr_0 = 2.3230e-04
Loss = 9.8912e-02, PNorm = 82.7345, GNorm = 0.4849, lr_0 = 2.3214e-04
Loss = 1.0047e-01, PNorm = 82.7362, GNorm = 0.8805, lr_0 = 2.3198e-04
Loss = 1.0049e-01, PNorm = 82.7404, GNorm = 0.5786, lr_0 = 2.3182e-04
Loss = 7.8695e-02, PNorm = 82.7425, GNorm = 0.5430, lr_0 = 2.3166e-04
Loss = 1.0043e-01, PNorm = 82.7449, GNorm = 0.8320, lr_0 = 2.3150e-04
Loss = 8.4946e-02, PNorm = 82.7474, GNorm = 0.5332, lr_0 = 2.3134e-04
Loss = 8.6854e-02, PNorm = 82.7500, GNorm = 0.6828, lr_0 = 2.3118e-04
Loss = 9.4965e-02, PNorm = 82.7533, GNorm = 0.6216, lr_0 = 2.3103e-04
Loss = 1.0670e-01, PNorm = 82.7570, GNorm = 0.6543, lr_0 = 2.3087e-04
Loss = 9.8491e-02, PNorm = 82.7632, GNorm = 0.5543, lr_0 = 2.3071e-04
Loss = 9.7951e-02, PNorm = 82.7699, GNorm = 0.5629, lr_0 = 2.3055e-04
Loss = 1.1302e-01, PNorm = 82.7712, GNorm = 0.5795, lr_0 = 2.3039e-04
Loss = 9.0060e-02, PNorm = 82.7770, GNorm = 0.9272, lr_0 = 2.3024e-04
Loss = 9.1148e-02, PNorm = 82.7808, GNorm = 0.7084, lr_0 = 2.3008e-04
Loss = 8.0086e-02, PNorm = 82.7856, GNorm = 0.6594, lr_0 = 2.2992e-04
Loss = 1.0325e-01, PNorm = 82.7913, GNorm = 0.7491, lr_0 = 2.2976e-04
Loss = 1.0528e-01, PNorm = 82.7932, GNorm = 0.4762, lr_0 = 2.2961e-04
Loss = 9.2210e-02, PNorm = 82.7976, GNorm = 0.6335, lr_0 = 2.2945e-04
Loss = 1.1523e-01, PNorm = 82.8047, GNorm = 0.9291, lr_0 = 2.2929e-04
Loss = 7.8720e-02, PNorm = 82.8080, GNorm = 0.8175, lr_0 = 2.2913e-04
Loss = 1.0378e-01, PNorm = 82.8072, GNorm = 0.9383, lr_0 = 2.2898e-04
Loss = 1.0447e-01, PNorm = 82.8079, GNorm = 0.8366, lr_0 = 2.2882e-04
Loss = 9.8361e-02, PNorm = 82.8123, GNorm = 0.7095, lr_0 = 2.2866e-04
Loss = 8.4650e-02, PNorm = 82.8198, GNorm = 0.5153, lr_0 = 2.2851e-04
Loss = 9.5159e-02, PNorm = 82.8226, GNorm = 0.9612, lr_0 = 2.2835e-04
Loss = 1.0085e-01, PNorm = 82.8259, GNorm = 0.5559, lr_0 = 2.2819e-04
Loss = 8.8638e-02, PNorm = 82.8267, GNorm = 0.6131, lr_0 = 2.2804e-04
Loss = 9.4506e-02, PNorm = 82.8327, GNorm = 0.7439, lr_0 = 2.2788e-04
Loss = 9.7626e-02, PNorm = 82.8402, GNorm = 0.7603, lr_0 = 2.2773e-04
Loss = 8.4877e-02, PNorm = 82.8460, GNorm = 0.9563, lr_0 = 2.2757e-04
Validation mae = 0.231076
Epoch 20
Loss = 8.2282e-02, PNorm = 82.8480, GNorm = 0.6589, lr_0 = 2.2741e-04
Loss = 9.1638e-02, PNorm = 82.8484, GNorm = 0.6667, lr_0 = 2.2726e-04
Loss = 8.2511e-02, PNorm = 82.8551, GNorm = 0.4802, lr_0 = 2.2710e-04
Loss = 9.1059e-02, PNorm = 82.8612, GNorm = 0.7558, lr_0 = 2.2695e-04
Loss = 9.4344e-02, PNorm = 82.8676, GNorm = 0.6859, lr_0 = 2.2679e-04
Loss = 8.6008e-02, PNorm = 82.8738, GNorm = 0.6165, lr_0 = 2.2664e-04
Loss = 9.9974e-02, PNorm = 82.8765, GNorm = 0.5178, lr_0 = 2.2648e-04
Loss = 8.9125e-02, PNorm = 82.8802, GNorm = 0.8521, lr_0 = 2.2632e-04
Loss = 8.7223e-02, PNorm = 82.8851, GNorm = 0.5480, lr_0 = 2.2617e-04
Loss = 8.4937e-02, PNorm = 82.8909, GNorm = 0.6072, lr_0 = 2.2601e-04
Loss = 8.0852e-02, PNorm = 82.8940, GNorm = 0.5513, lr_0 = 2.2586e-04
Loss = 8.1009e-02, PNorm = 82.8950, GNorm = 0.5701, lr_0 = 2.2571e-04
Loss = 8.8147e-02, PNorm = 82.9016, GNorm = 0.7750, lr_0 = 2.2555e-04
Loss = 7.7773e-02, PNorm = 82.9081, GNorm = 0.6374, lr_0 = 2.2540e-04
Loss = 1.0800e-01, PNorm = 82.9111, GNorm = 0.7634, lr_0 = 2.2524e-04
Loss = 7.9473e-02, PNorm = 82.9147, GNorm = 0.7369, lr_0 = 2.2509e-04
Loss = 9.8486e-02, PNorm = 82.9202, GNorm = 0.5992, lr_0 = 2.2493e-04
Loss = 9.0727e-02, PNorm = 82.9257, GNorm = 0.7347, lr_0 = 2.2478e-04
Loss = 9.3148e-02, PNorm = 82.9292, GNorm = 0.6876, lr_0 = 2.2463e-04
Loss = 8.0954e-02, PNorm = 82.9328, GNorm = 0.5502, lr_0 = 2.2447e-04
Loss = 8.2588e-02, PNorm = 82.9378, GNorm = 0.5740, lr_0 = 2.2432e-04
Loss = 9.2473e-02, PNorm = 82.9426, GNorm = 0.7791, lr_0 = 2.2416e-04
Loss = 8.3484e-02, PNorm = 82.9473, GNorm = 0.9092, lr_0 = 2.2401e-04
Loss = 8.1167e-02, PNorm = 82.9497, GNorm = 0.6246, lr_0 = 2.2386e-04
Loss = 1.0176e-01, PNorm = 82.9519, GNorm = 0.4774, lr_0 = 2.2370e-04
Loss = 8.2515e-02, PNorm = 82.9545, GNorm = 0.6045, lr_0 = 2.2355e-04
Loss = 8.5864e-02, PNorm = 82.9572, GNorm = 0.6574, lr_0 = 2.2340e-04
Loss = 8.5806e-02, PNorm = 82.9601, GNorm = 0.6120, lr_0 = 2.2324e-04
Loss = 9.9493e-02, PNorm = 82.9637, GNorm = 0.7748, lr_0 = 2.2309e-04
Loss = 9.4165e-02, PNorm = 82.9665, GNorm = 0.7148, lr_0 = 2.2294e-04
Loss = 9.0598e-02, PNorm = 82.9724, GNorm = 0.6980, lr_0 = 2.2279e-04
Loss = 8.0557e-02, PNorm = 82.9784, GNorm = 0.7878, lr_0 = 2.2263e-04
Loss = 9.4720e-02, PNorm = 82.9832, GNorm = 0.7137, lr_0 = 2.2248e-04
Loss = 8.5781e-02, PNorm = 82.9885, GNorm = 0.6252, lr_0 = 2.2233e-04
Loss = 1.0057e-01, PNorm = 82.9941, GNorm = 0.6457, lr_0 = 2.2218e-04
Loss = 8.5339e-02, PNorm = 82.9993, GNorm = 0.6693, lr_0 = 2.2202e-04
Loss = 9.4855e-02, PNorm = 83.0048, GNorm = 0.5700, lr_0 = 2.2187e-04
Loss = 9.1783e-02, PNorm = 83.0095, GNorm = 0.7274, lr_0 = 2.2172e-04
Loss = 9.6466e-02, PNorm = 83.0121, GNorm = 0.6163, lr_0 = 2.2157e-04
Loss = 9.5488e-02, PNorm = 83.0174, GNorm = 0.5657, lr_0 = 2.2142e-04
Loss = 9.5056e-02, PNorm = 83.0242, GNorm = 0.7632, lr_0 = 2.2126e-04
Loss = 8.1373e-02, PNorm = 83.0268, GNorm = 0.5707, lr_0 = 2.2111e-04
Loss = 9.5370e-02, PNorm = 83.0310, GNorm = 0.8949, lr_0 = 2.2096e-04
Loss = 9.4108e-02, PNorm = 83.0358, GNorm = 0.7353, lr_0 = 2.2081e-04
Loss = 9.0703e-02, PNorm = 83.0372, GNorm = 0.6842, lr_0 = 2.2066e-04
Loss = 8.8461e-02, PNorm = 83.0409, GNorm = 0.6928, lr_0 = 2.2051e-04
Loss = 8.7576e-02, PNorm = 83.0446, GNorm = 0.7290, lr_0 = 2.2036e-04
Loss = 8.4847e-02, PNorm = 83.0488, GNorm = 0.6104, lr_0 = 2.2021e-04
Loss = 9.9546e-02, PNorm = 83.0488, GNorm = 0.5377, lr_0 = 2.2005e-04
Loss = 8.8464e-02, PNorm = 83.0503, GNorm = 0.5544, lr_0 = 2.1990e-04
Loss = 9.1860e-02, PNorm = 83.0548, GNorm = 0.6737, lr_0 = 2.1975e-04
Loss = 9.4289e-02, PNorm = 83.0592, GNorm = 0.4665, lr_0 = 2.1960e-04
Loss = 8.8403e-02, PNorm = 83.0662, GNorm = 0.5011, lr_0 = 2.1945e-04
Loss = 8.6703e-02, PNorm = 83.0715, GNorm = 0.5826, lr_0 = 2.1930e-04
Loss = 8.4787e-02, PNorm = 83.0753, GNorm = 0.6671, lr_0 = 2.1915e-04
Loss = 1.0527e-01, PNorm = 83.0786, GNorm = 0.6017, lr_0 = 2.1900e-04
Loss = 8.3977e-02, PNorm = 83.0836, GNorm = 0.5708, lr_0 = 2.1885e-04
Loss = 8.9191e-02, PNorm = 83.0895, GNorm = 0.5892, lr_0 = 2.1870e-04
Loss = 9.2822e-02, PNorm = 83.0917, GNorm = 0.5784, lr_0 = 2.1855e-04
Loss = 9.1564e-02, PNorm = 83.0953, GNorm = 0.6996, lr_0 = 2.1840e-04
Loss = 9.9860e-02, PNorm = 83.1038, GNorm = 0.6533, lr_0 = 2.1825e-04
Loss = 9.1038e-02, PNorm = 83.1072, GNorm = 0.6694, lr_0 = 2.1810e-04
Loss = 8.9490e-02, PNorm = 83.1100, GNorm = 0.5513, lr_0 = 2.1795e-04
Loss = 1.0943e-01, PNorm = 83.1166, GNorm = 0.6869, lr_0 = 2.1780e-04
Loss = 9.2009e-02, PNorm = 83.1203, GNorm = 0.5012, lr_0 = 2.1765e-04
Loss = 9.9619e-02, PNorm = 83.1239, GNorm = 0.4625, lr_0 = 2.1751e-04
Loss = 9.2356e-02, PNorm = 83.1253, GNorm = 0.6308, lr_0 = 2.1736e-04
Loss = 9.1389e-02, PNorm = 83.1290, GNorm = 0.7915, lr_0 = 2.1721e-04
Loss = 8.5077e-02, PNorm = 83.1350, GNorm = 0.6284, lr_0 = 2.1706e-04
Loss = 8.6568e-02, PNorm = 83.1390, GNorm = 0.6896, lr_0 = 2.1691e-04
Loss = 9.1353e-02, PNorm = 83.1456, GNorm = 0.8462, lr_0 = 2.1676e-04
Loss = 8.5325e-02, PNorm = 83.1507, GNorm = 0.6500, lr_0 = 2.1661e-04
Loss = 1.1054e-01, PNorm = 83.1526, GNorm = 0.8343, lr_0 = 2.1646e-04
Loss = 9.2375e-02, PNorm = 83.1565, GNorm = 0.6649, lr_0 = 2.1632e-04
Loss = 9.6675e-02, PNorm = 83.1581, GNorm = 0.6252, lr_0 = 2.1617e-04
Loss = 7.7462e-02, PNorm = 83.1581, GNorm = 0.7472, lr_0 = 2.1602e-04
Loss = 9.2983e-02, PNorm = 83.1612, GNorm = 0.8071, lr_0 = 2.1587e-04
Loss = 1.0935e-01, PNorm = 83.1674, GNorm = 0.5676, lr_0 = 2.1572e-04
Loss = 8.9308e-02, PNorm = 83.1744, GNorm = 0.7812, lr_0 = 2.1558e-04
Loss = 1.0254e-01, PNorm = 83.1783, GNorm = 0.6910, lr_0 = 2.1543e-04
Loss = 1.0224e-01, PNorm = 83.1834, GNorm = 0.6773, lr_0 = 2.1528e-04
Loss = 1.0079e-01, PNorm = 83.1875, GNorm = 0.5807, lr_0 = 2.1513e-04
Loss = 1.0372e-01, PNorm = 83.1913, GNorm = 0.5061, lr_0 = 2.1499e-04
Loss = 8.7662e-02, PNorm = 83.1969, GNorm = 0.7312, lr_0 = 2.1484e-04
Loss = 1.0495e-01, PNorm = 83.2007, GNorm = 0.7098, lr_0 = 2.1469e-04
Loss = 9.1107e-02, PNorm = 83.2042, GNorm = 0.5723, lr_0 = 2.1454e-04
Loss = 8.8456e-02, PNorm = 83.2076, GNorm = 0.6438, lr_0 = 2.1440e-04
Loss = 8.8825e-02, PNorm = 83.2128, GNorm = 0.8887, lr_0 = 2.1425e-04
Loss = 1.0971e-01, PNorm = 83.2193, GNorm = 0.6926, lr_0 = 2.1410e-04
Loss = 9.4841e-02, PNorm = 83.2247, GNorm = 0.6503, lr_0 = 2.1396e-04
Loss = 8.9386e-02, PNorm = 83.2313, GNorm = 0.6542, lr_0 = 2.1381e-04
Loss = 8.6378e-02, PNorm = 83.2360, GNorm = 0.7428, lr_0 = 2.1366e-04
Loss = 8.4667e-02, PNorm = 83.2383, GNorm = 0.5441, lr_0 = 2.1352e-04
Loss = 9.6635e-02, PNorm = 83.2421, GNorm = 0.7452, lr_0 = 2.1337e-04
Loss = 8.3311e-02, PNorm = 83.2463, GNorm = 0.7087, lr_0 = 2.1323e-04
Loss = 8.7452e-02, PNorm = 83.2483, GNorm = 0.5802, lr_0 = 2.1308e-04
Loss = 1.0612e-01, PNorm = 83.2525, GNorm = 0.8640, lr_0 = 2.1293e-04
Loss = 8.9811e-02, PNorm = 83.2586, GNorm = 0.6868, lr_0 = 2.1279e-04
Loss = 8.9135e-02, PNorm = 83.2599, GNorm = 0.4994, lr_0 = 2.1264e-04
Loss = 9.9196e-02, PNorm = 83.2620, GNorm = 0.8628, lr_0 = 2.1250e-04
Loss = 9.3910e-02, PNorm = 83.2643, GNorm = 0.9435, lr_0 = 2.1235e-04
Loss = 9.1400e-02, PNorm = 83.2669, GNorm = 0.5966, lr_0 = 2.1221e-04
Loss = 9.1712e-02, PNorm = 83.2724, GNorm = 0.6434, lr_0 = 2.1206e-04
Loss = 9.1170e-02, PNorm = 83.2742, GNorm = 0.8370, lr_0 = 2.1191e-04
Loss = 7.7598e-02, PNorm = 83.2785, GNorm = 0.4241, lr_0 = 2.1177e-04
Loss = 9.6325e-02, PNorm = 83.2862, GNorm = 0.5729, lr_0 = 2.1162e-04
Loss = 9.1631e-02, PNorm = 83.2892, GNorm = 0.9971, lr_0 = 2.1148e-04
Loss = 8.3422e-02, PNorm = 83.2928, GNorm = 0.7310, lr_0 = 2.1133e-04
Loss = 9.1423e-02, PNorm = 83.2977, GNorm = 0.6037, lr_0 = 2.1119e-04
Loss = 8.9009e-02, PNorm = 83.3001, GNorm = 0.6424, lr_0 = 2.1104e-04
Loss = 9.2700e-02, PNorm = 83.3027, GNorm = 0.6128, lr_0 = 2.1090e-04
Loss = 9.2051e-02, PNorm = 83.3065, GNorm = 0.6872, lr_0 = 2.1076e-04
Loss = 9.6490e-02, PNorm = 83.3096, GNorm = 0.6317, lr_0 = 2.1061e-04
Loss = 7.5571e-02, PNorm = 83.3131, GNorm = 0.5351, lr_0 = 2.1047e-04
Loss = 8.7711e-02, PNorm = 83.3193, GNorm = 0.6703, lr_0 = 2.1032e-04
Loss = 9.1252e-02, PNorm = 83.3242, GNorm = 0.5009, lr_0 = 2.1018e-04
Loss = 8.9966e-02, PNorm = 83.3278, GNorm = 0.6041, lr_0 = 2.1003e-04
Loss = 1.0505e-01, PNorm = 83.3320, GNorm = 0.4715, lr_0 = 2.0989e-04
Loss = 8.4506e-02, PNorm = 83.3353, GNorm = 0.7166, lr_0 = 2.0975e-04
Loss = 9.4697e-02, PNorm = 83.3359, GNorm = 0.7456, lr_0 = 2.0960e-04
Validation mae = 0.227885
Epoch 21
Loss = 8.0733e-02, PNorm = 83.3381, GNorm = 0.8322, lr_0 = 2.0946e-04
Loss = 9.0102e-02, PNorm = 83.3427, GNorm = 0.6629, lr_0 = 2.0932e-04
Loss = 8.1781e-02, PNorm = 83.3438, GNorm = 0.6303, lr_0 = 2.0917e-04
Loss = 9.3012e-02, PNorm = 83.3455, GNorm = 0.5482, lr_0 = 2.0903e-04
Loss = 7.6315e-02, PNorm = 83.3498, GNorm = 0.7181, lr_0 = 2.0889e-04
Loss = 9.2064e-02, PNorm = 83.3521, GNorm = 0.5720, lr_0 = 2.0874e-04
Loss = 8.5426e-02, PNorm = 83.3562, GNorm = 0.6025, lr_0 = 2.0860e-04
Loss = 8.5770e-02, PNorm = 83.3606, GNorm = 0.6343, lr_0 = 2.0846e-04
Loss = 8.7744e-02, PNorm = 83.3659, GNorm = 0.6244, lr_0 = 2.0831e-04
Loss = 9.1383e-02, PNorm = 83.3699, GNorm = 0.6811, lr_0 = 2.0817e-04
Loss = 9.1708e-02, PNorm = 83.3720, GNorm = 0.6373, lr_0 = 2.0803e-04
Loss = 9.6493e-02, PNorm = 83.3709, GNorm = 0.6357, lr_0 = 2.0789e-04
Loss = 8.8932e-02, PNorm = 83.3735, GNorm = 0.6594, lr_0 = 2.0774e-04
Loss = 7.7658e-02, PNorm = 83.3758, GNorm = 0.4411, lr_0 = 2.0760e-04
Loss = 9.0255e-02, PNorm = 83.3803, GNorm = 0.4845, lr_0 = 2.0746e-04
Loss = 9.5767e-02, PNorm = 83.3858, GNorm = 0.6538, lr_0 = 2.0732e-04
Loss = 1.0019e-01, PNorm = 83.3913, GNorm = 0.7844, lr_0 = 2.0718e-04
Loss = 7.7794e-02, PNorm = 83.3965, GNorm = 0.5107, lr_0 = 2.0703e-04
Loss = 8.8570e-02, PNorm = 83.3979, GNorm = 0.7867, lr_0 = 2.0689e-04
Loss = 8.6089e-02, PNorm = 83.3982, GNorm = 0.6940, lr_0 = 2.0675e-04
Loss = 8.8519e-02, PNorm = 83.4022, GNorm = 0.4680, lr_0 = 2.0661e-04
Loss = 9.3257e-02, PNorm = 83.4075, GNorm = 0.6638, lr_0 = 2.0647e-04
Loss = 9.7526e-02, PNorm = 83.4128, GNorm = 0.6178, lr_0 = 2.0633e-04
Loss = 8.3023e-02, PNorm = 83.4167, GNorm = 0.6242, lr_0 = 2.0618e-04
Loss = 8.8116e-02, PNorm = 83.4208, GNorm = 0.7052, lr_0 = 2.0604e-04
Loss = 9.3194e-02, PNorm = 83.4239, GNorm = 0.5573, lr_0 = 2.0590e-04
Loss = 9.9212e-02, PNorm = 83.4302, GNorm = 0.6703, lr_0 = 2.0576e-04
Loss = 8.6158e-02, PNorm = 83.4355, GNorm = 0.6770, lr_0 = 2.0562e-04
Loss = 8.5535e-02, PNorm = 83.4394, GNorm = 0.6390, lr_0 = 2.0548e-04
Loss = 9.2705e-02, PNorm = 83.4418, GNorm = 0.7645, lr_0 = 2.0534e-04
Loss = 9.0190e-02, PNorm = 83.4441, GNorm = 0.3977, lr_0 = 2.0520e-04
Loss = 9.1508e-02, PNorm = 83.4452, GNorm = 0.3863, lr_0 = 2.0506e-04
Loss = 8.3998e-02, PNorm = 83.4484, GNorm = 0.4951, lr_0 = 2.0492e-04
Loss = 9.2599e-02, PNorm = 83.4489, GNorm = 1.1233, lr_0 = 2.0478e-04
Loss = 9.1938e-02, PNorm = 83.4527, GNorm = 0.5423, lr_0 = 2.0464e-04
Loss = 9.1034e-02, PNorm = 83.4575, GNorm = 0.6277, lr_0 = 2.0450e-04
Loss = 8.4859e-02, PNorm = 83.4601, GNorm = 0.6936, lr_0 = 2.0436e-04
Loss = 9.8047e-02, PNorm = 83.4637, GNorm = 0.8118, lr_0 = 2.0422e-04
Loss = 8.8288e-02, PNorm = 83.4690, GNorm = 0.8309, lr_0 = 2.0408e-04
Loss = 8.2500e-02, PNorm = 83.4742, GNorm = 0.7145, lr_0 = 2.0394e-04
Loss = 9.6576e-02, PNorm = 83.4792, GNorm = 0.6525, lr_0 = 2.0380e-04
Loss = 9.7685e-02, PNorm = 83.4847, GNorm = 0.9337, lr_0 = 2.0366e-04
Loss = 7.9291e-02, PNorm = 83.4883, GNorm = 0.7833, lr_0 = 2.0352e-04
Loss = 8.9000e-02, PNorm = 83.4938, GNorm = 0.8185, lr_0 = 2.0338e-04
Loss = 7.9613e-02, PNorm = 83.4959, GNorm = 0.6950, lr_0 = 2.0324e-04
Loss = 8.2156e-02, PNorm = 83.4983, GNorm = 0.5527, lr_0 = 2.0310e-04
Loss = 8.0785e-02, PNorm = 83.5014, GNorm = 0.8794, lr_0 = 2.0296e-04
Loss = 8.3675e-02, PNorm = 83.5054, GNorm = 0.6514, lr_0 = 2.0282e-04
Loss = 9.6632e-02, PNorm = 83.5072, GNorm = 0.5868, lr_0 = 2.0268e-04
Loss = 9.3389e-02, PNorm = 83.5108, GNorm = 0.6655, lr_0 = 2.0254e-04
Loss = 8.2396e-02, PNorm = 83.5140, GNorm = 0.4885, lr_0 = 2.0240e-04
Loss = 8.9400e-02, PNorm = 83.5192, GNorm = 0.6494, lr_0 = 2.0227e-04
Loss = 7.8151e-02, PNorm = 83.5228, GNorm = 0.5868, lr_0 = 2.0213e-04
Loss = 8.4354e-02, PNorm = 83.5265, GNorm = 0.4639, lr_0 = 2.0199e-04
Loss = 8.4278e-02, PNorm = 83.5320, GNorm = 0.5047, lr_0 = 2.0185e-04
Loss = 8.6859e-02, PNorm = 83.5367, GNorm = 0.5697, lr_0 = 2.0171e-04
Loss = 9.1406e-02, PNorm = 83.5398, GNorm = 0.6024, lr_0 = 2.0157e-04
Loss = 9.2714e-02, PNorm = 83.5436, GNorm = 0.4496, lr_0 = 2.0144e-04
Loss = 8.6337e-02, PNorm = 83.5481, GNorm = 0.8896, lr_0 = 2.0130e-04
Loss = 9.3927e-02, PNorm = 83.5523, GNorm = 0.5929, lr_0 = 2.0116e-04
Loss = 8.6595e-02, PNorm = 83.5536, GNorm = 0.8361, lr_0 = 2.0102e-04
Loss = 8.5961e-02, PNorm = 83.5560, GNorm = 0.7852, lr_0 = 2.0088e-04
Loss = 9.4230e-02, PNorm = 83.5609, GNorm = 0.5000, lr_0 = 2.0075e-04
Loss = 8.0553e-02, PNorm = 83.5657, GNorm = 0.6318, lr_0 = 2.0061e-04
Loss = 8.0987e-02, PNorm = 83.5687, GNorm = 0.5683, lr_0 = 2.0047e-04
Loss = 7.9070e-02, PNorm = 83.5715, GNorm = 0.5876, lr_0 = 2.0033e-04
Loss = 9.2622e-02, PNorm = 83.5773, GNorm = 0.8852, lr_0 = 2.0020e-04
Loss = 9.2839e-02, PNorm = 83.5799, GNorm = 0.8523, lr_0 = 2.0006e-04
Loss = 9.3942e-02, PNorm = 83.5817, GNorm = 0.6026, lr_0 = 1.9992e-04
Loss = 8.8648e-02, PNorm = 83.5831, GNorm = 0.7248, lr_0 = 1.9979e-04
Loss = 8.7431e-02, PNorm = 83.5852, GNorm = 0.4856, lr_0 = 1.9965e-04
Loss = 8.7710e-02, PNorm = 83.5867, GNorm = 0.6220, lr_0 = 1.9951e-04
Loss = 9.4092e-02, PNorm = 83.5943, GNorm = 0.6330, lr_0 = 1.9938e-04
Loss = 8.4300e-02, PNorm = 83.6004, GNorm = 0.6463, lr_0 = 1.9924e-04
Loss = 9.8124e-02, PNorm = 83.6045, GNorm = 0.5243, lr_0 = 1.9910e-04
Loss = 9.4591e-02, PNorm = 83.6080, GNorm = 0.7294, lr_0 = 1.9897e-04
Loss = 9.0847e-02, PNorm = 83.6123, GNorm = 0.6745, lr_0 = 1.9883e-04
Loss = 1.0234e-01, PNorm = 83.6147, GNorm = 0.5496, lr_0 = 1.9869e-04
Loss = 1.0067e-01, PNorm = 83.6192, GNorm = 0.7122, lr_0 = 1.9856e-04
Loss = 9.5645e-02, PNorm = 83.6221, GNorm = 0.5734, lr_0 = 1.9842e-04
Loss = 8.8234e-02, PNorm = 83.6243, GNorm = 0.6449, lr_0 = 1.9829e-04
Loss = 8.2208e-02, PNorm = 83.6242, GNorm = 0.7361, lr_0 = 1.9815e-04
Loss = 9.6339e-02, PNorm = 83.6266, GNorm = 0.7335, lr_0 = 1.9801e-04
Loss = 8.8215e-02, PNorm = 83.6313, GNorm = 0.9968, lr_0 = 1.9788e-04
Loss = 7.9844e-02, PNorm = 83.6351, GNorm = 0.8535, lr_0 = 1.9774e-04
Loss = 9.1977e-02, PNorm = 83.6403, GNorm = 0.6299, lr_0 = 1.9761e-04
Loss = 9.5902e-02, PNorm = 83.6444, GNorm = 1.0142, lr_0 = 1.9747e-04
Loss = 8.5665e-02, PNorm = 83.6464, GNorm = 0.6713, lr_0 = 1.9734e-04
Loss = 9.1115e-02, PNorm = 83.6482, GNorm = 0.5490, lr_0 = 1.9720e-04
Loss = 6.9645e-02, PNorm = 83.6505, GNorm = 0.8307, lr_0 = 1.9707e-04
Loss = 9.6169e-02, PNorm = 83.6535, GNorm = 0.6516, lr_0 = 1.9693e-04
Loss = 8.3012e-02, PNorm = 83.6553, GNorm = 0.6645, lr_0 = 1.9680e-04
Loss = 8.7299e-02, PNorm = 83.6580, GNorm = 0.5403, lr_0 = 1.9666e-04
Loss = 8.8274e-02, PNorm = 83.6620, GNorm = 0.5156, lr_0 = 1.9653e-04
Loss = 7.9813e-02, PNorm = 83.6665, GNorm = 0.5378, lr_0 = 1.9639e-04
Loss = 1.0468e-01, PNorm = 83.6712, GNorm = 0.9448, lr_0 = 1.9626e-04
Loss = 8.7317e-02, PNorm = 83.6730, GNorm = 0.5531, lr_0 = 1.9612e-04
Loss = 9.2287e-02, PNorm = 83.6737, GNorm = 0.4156, lr_0 = 1.9599e-04
Loss = 9.6132e-02, PNorm = 83.6783, GNorm = 0.9344, lr_0 = 1.9585e-04
Loss = 9.5843e-02, PNorm = 83.6827, GNorm = 0.7544, lr_0 = 1.9572e-04
Loss = 9.0403e-02, PNorm = 83.6866, GNorm = 0.7351, lr_0 = 1.9559e-04
Loss = 8.0338e-02, PNorm = 83.6881, GNorm = 0.5164, lr_0 = 1.9545e-04
Loss = 1.0265e-01, PNorm = 83.6913, GNorm = 0.6499, lr_0 = 1.9532e-04
Loss = 8.8035e-02, PNorm = 83.6952, GNorm = 0.5513, lr_0 = 1.9518e-04
Loss = 9.2275e-02, PNorm = 83.7008, GNorm = 0.6729, lr_0 = 1.9505e-04
Loss = 8.5160e-02, PNorm = 83.7065, GNorm = 0.5966, lr_0 = 1.9492e-04
Loss = 9.2304e-02, PNorm = 83.7097, GNorm = 0.6036, lr_0 = 1.9478e-04
Loss = 9.7503e-02, PNorm = 83.7115, GNorm = 0.6542, lr_0 = 1.9465e-04
Loss = 8.3202e-02, PNorm = 83.7150, GNorm = 0.6674, lr_0 = 1.9452e-04
Loss = 1.0009e-01, PNorm = 83.7182, GNorm = 0.5889, lr_0 = 1.9438e-04
Loss = 9.0703e-02, PNorm = 83.7207, GNorm = 0.6923, lr_0 = 1.9425e-04
Loss = 9.7495e-02, PNorm = 83.7234, GNorm = 0.7025, lr_0 = 1.9412e-04
Loss = 8.7160e-02, PNorm = 83.7247, GNorm = 0.6492, lr_0 = 1.9398e-04
Loss = 7.6405e-02, PNorm = 83.7263, GNorm = 0.5638, lr_0 = 1.9385e-04
Loss = 8.5201e-02, PNorm = 83.7297, GNorm = 0.4981, lr_0 = 1.9372e-04
Loss = 9.1392e-02, PNorm = 83.7333, GNorm = 0.6922, lr_0 = 1.9359e-04
Loss = 8.9463e-02, PNorm = 83.7354, GNorm = 0.8354, lr_0 = 1.9345e-04
Loss = 8.3694e-02, PNorm = 83.7359, GNorm = 0.6416, lr_0 = 1.9332e-04
Loss = 8.4718e-02, PNorm = 83.7389, GNorm = 0.5300, lr_0 = 1.9319e-04
Loss = 9.6400e-02, PNorm = 83.7412, GNorm = 0.8379, lr_0 = 1.9306e-04
Validation mae = 0.228159
Epoch 22
Loss = 7.5211e-02, PNorm = 83.7451, GNorm = 0.7526, lr_0 = 1.9292e-04
Loss = 7.5340e-02, PNorm = 83.7491, GNorm = 0.7230, lr_0 = 1.9279e-04
Loss = 8.6657e-02, PNorm = 83.7491, GNorm = 0.4519, lr_0 = 1.9266e-04
Loss = 7.6909e-02, PNorm = 83.7505, GNorm = 0.5675, lr_0 = 1.9253e-04
Loss = 7.9340e-02, PNorm = 83.7537, GNorm = 0.5650, lr_0 = 1.9240e-04
Loss = 9.3523e-02, PNorm = 83.7572, GNorm = 0.8126, lr_0 = 1.9226e-04
Loss = 8.4607e-02, PNorm = 83.7603, GNorm = 0.8570, lr_0 = 1.9213e-04
Loss = 8.3058e-02, PNorm = 83.7625, GNorm = 0.4532, lr_0 = 1.9200e-04
Loss = 8.6586e-02, PNorm = 83.7676, GNorm = 0.5784, lr_0 = 1.9187e-04
Loss = 8.7052e-02, PNorm = 83.7708, GNorm = 0.6919, lr_0 = 1.9174e-04
Loss = 8.9795e-02, PNorm = 83.7740, GNorm = 0.8178, lr_0 = 1.9161e-04
Loss = 8.3226e-02, PNorm = 83.7785, GNorm = 0.6630, lr_0 = 1.9148e-04
Loss = 7.9083e-02, PNorm = 83.7833, GNorm = 0.8087, lr_0 = 1.9134e-04
Loss = 8.3477e-02, PNorm = 83.7860, GNorm = 0.6826, lr_0 = 1.9121e-04
Loss = 8.7358e-02, PNorm = 83.7904, GNorm = 0.6324, lr_0 = 1.9108e-04
Loss = 9.5589e-02, PNorm = 83.7924, GNorm = 0.7552, lr_0 = 1.9095e-04
Loss = 8.2090e-02, PNorm = 83.7944, GNorm = 0.6890, lr_0 = 1.9082e-04
Loss = 8.6583e-02, PNorm = 83.7980, GNorm = 0.6925, lr_0 = 1.9069e-04
Loss = 8.5104e-02, PNorm = 83.8032, GNorm = 0.7278, lr_0 = 1.9056e-04
Loss = 9.3503e-02, PNorm = 83.8074, GNorm = 0.9497, lr_0 = 1.9043e-04
Loss = 7.5177e-02, PNorm = 83.8107, GNorm = 0.6334, lr_0 = 1.9030e-04
Loss = 7.6634e-02, PNorm = 83.8131, GNorm = 0.6024, lr_0 = 1.9017e-04
Loss = 8.5643e-02, PNorm = 83.8139, GNorm = 0.7087, lr_0 = 1.9004e-04
Loss = 8.7981e-02, PNorm = 83.8167, GNorm = 0.5898, lr_0 = 1.8991e-04
Loss = 8.0715e-02, PNorm = 83.8220, GNorm = 0.6845, lr_0 = 1.8978e-04
Loss = 8.4456e-02, PNorm = 83.8254, GNorm = 0.6507, lr_0 = 1.8965e-04
Loss = 8.3526e-02, PNorm = 83.8304, GNorm = 0.6923, lr_0 = 1.8952e-04
Loss = 9.5469e-02, PNorm = 83.8351, GNorm = 0.6129, lr_0 = 1.8939e-04
Loss = 9.0175e-02, PNorm = 83.8370, GNorm = 0.5813, lr_0 = 1.8926e-04
Loss = 7.7282e-02, PNorm = 83.8391, GNorm = 0.6293, lr_0 = 1.8913e-04
Loss = 8.7295e-02, PNorm = 83.8393, GNorm = 0.5241, lr_0 = 1.8900e-04
Loss = 8.3766e-02, PNorm = 83.8421, GNorm = 0.6478, lr_0 = 1.8887e-04
Loss = 8.4325e-02, PNorm = 83.8458, GNorm = 0.6277, lr_0 = 1.8874e-04
Loss = 7.9888e-02, PNorm = 83.8513, GNorm = 0.6909, lr_0 = 1.8861e-04
Loss = 7.8604e-02, PNorm = 83.8539, GNorm = 0.5336, lr_0 = 1.8848e-04
Loss = 7.6062e-02, PNorm = 83.8557, GNorm = 0.5930, lr_0 = 1.8835e-04
Loss = 9.2480e-02, PNorm = 83.8587, GNorm = 0.6945, lr_0 = 1.8822e-04
Loss = 7.6344e-02, PNorm = 83.8578, GNorm = 0.4423, lr_0 = 1.8809e-04
Loss = 8.0328e-02, PNorm = 83.8595, GNorm = 0.6575, lr_0 = 1.8797e-04
Loss = 8.3785e-02, PNorm = 83.8615, GNorm = 0.9679, lr_0 = 1.8784e-04
Loss = 8.2388e-02, PNorm = 83.8635, GNorm = 0.5064, lr_0 = 1.8771e-04
Loss = 8.6720e-02, PNorm = 83.8677, GNorm = 0.5068, lr_0 = 1.8758e-04
Loss = 8.6783e-02, PNorm = 83.8711, GNorm = 0.5441, lr_0 = 1.8745e-04
Loss = 7.5019e-02, PNorm = 83.8737, GNorm = 0.6506, lr_0 = 1.8732e-04
Loss = 7.8193e-02, PNorm = 83.8757, GNorm = 0.7211, lr_0 = 1.8719e-04
Loss = 8.9089e-02, PNorm = 83.8788, GNorm = 0.6352, lr_0 = 1.8707e-04
Loss = 8.7613e-02, PNorm = 83.8807, GNorm = 0.9542, lr_0 = 1.8694e-04
Loss = 8.6485e-02, PNorm = 83.8853, GNorm = 0.6687, lr_0 = 1.8681e-04
Loss = 8.9429e-02, PNorm = 83.8888, GNorm = 0.6085, lr_0 = 1.8668e-04
Loss = 8.0625e-02, PNorm = 83.8943, GNorm = 0.8368, lr_0 = 1.8655e-04
Loss = 8.7041e-02, PNorm = 83.8995, GNorm = 0.6226, lr_0 = 1.8643e-04
Loss = 7.2540e-02, PNorm = 83.9010, GNorm = 0.5299, lr_0 = 1.8630e-04
Loss = 9.1853e-02, PNorm = 83.9028, GNorm = 0.7126, lr_0 = 1.8617e-04
Loss = 8.9585e-02, PNorm = 83.9059, GNorm = 0.5039, lr_0 = 1.8604e-04
Loss = 9.3038e-02, PNorm = 83.9104, GNorm = 0.8638, lr_0 = 1.8592e-04
Loss = 7.8978e-02, PNorm = 83.9127, GNorm = 0.7662, lr_0 = 1.8579e-04
Loss = 7.8922e-02, PNorm = 83.9156, GNorm = 0.6790, lr_0 = 1.8566e-04
Loss = 9.1542e-02, PNorm = 83.9184, GNorm = 0.5738, lr_0 = 1.8553e-04
Loss = 7.5972e-02, PNorm = 83.9199, GNorm = 0.6360, lr_0 = 1.8541e-04
Loss = 8.9807e-02, PNorm = 83.9207, GNorm = 0.6941, lr_0 = 1.8528e-04
Loss = 8.1895e-02, PNorm = 83.9246, GNorm = 0.6389, lr_0 = 1.8515e-04
Loss = 1.0712e-01, PNorm = 83.9282, GNorm = 0.6511, lr_0 = 1.8503e-04
Loss = 8.7311e-02, PNorm = 83.9315, GNorm = 0.6835, lr_0 = 1.8490e-04
Loss = 8.8893e-02, PNorm = 83.9349, GNorm = 0.4704, lr_0 = 1.8477e-04
Loss = 9.1660e-02, PNorm = 83.9392, GNorm = 0.9287, lr_0 = 1.8465e-04
Loss = 8.9972e-02, PNorm = 83.9434, GNorm = 0.7609, lr_0 = 1.8452e-04
Loss = 8.7826e-02, PNorm = 83.9454, GNorm = 0.6913, lr_0 = 1.8439e-04
Loss = 8.4260e-02, PNorm = 83.9492, GNorm = 0.5902, lr_0 = 1.8427e-04
Loss = 8.4796e-02, PNorm = 83.9523, GNorm = 0.6923, lr_0 = 1.8414e-04
Loss = 8.5306e-02, PNorm = 83.9551, GNorm = 0.6685, lr_0 = 1.8401e-04
Loss = 8.2748e-02, PNorm = 83.9597, GNorm = 0.5803, lr_0 = 1.8389e-04
Loss = 8.2525e-02, PNorm = 83.9611, GNorm = 0.6589, lr_0 = 1.8376e-04
Loss = 8.3508e-02, PNorm = 83.9641, GNorm = 0.7334, lr_0 = 1.8364e-04
Loss = 9.0088e-02, PNorm = 83.9660, GNorm = 1.4149, lr_0 = 1.8351e-04
Loss = 7.7762e-02, PNorm = 83.9668, GNorm = 0.7386, lr_0 = 1.8338e-04
Loss = 8.9975e-02, PNorm = 83.9699, GNorm = 0.8611, lr_0 = 1.8326e-04
Loss = 9.2137e-02, PNorm = 83.9727, GNorm = 0.6523, lr_0 = 1.8313e-04
Loss = 8.5537e-02, PNorm = 83.9764, GNorm = 0.6638, lr_0 = 1.8301e-04
Loss = 9.3653e-02, PNorm = 83.9800, GNorm = 0.6283, lr_0 = 1.8288e-04
Loss = 9.1054e-02, PNorm = 83.9835, GNorm = 0.7367, lr_0 = 1.8276e-04
Loss = 8.0983e-02, PNorm = 83.9855, GNorm = 0.5299, lr_0 = 1.8263e-04
Loss = 8.7292e-02, PNorm = 83.9893, GNorm = 0.7146, lr_0 = 1.8251e-04
Loss = 9.5091e-02, PNorm = 83.9942, GNorm = 0.7529, lr_0 = 1.8238e-04
Loss = 8.9273e-02, PNorm = 83.9986, GNorm = 0.7303, lr_0 = 1.8226e-04
Loss = 8.1040e-02, PNorm = 84.0031, GNorm = 0.8228, lr_0 = 1.8213e-04
Loss = 8.7617e-02, PNorm = 84.0063, GNorm = 0.5142, lr_0 = 1.8201e-04
Loss = 9.1441e-02, PNorm = 84.0086, GNorm = 0.5361, lr_0 = 1.8188e-04
Loss = 8.9559e-02, PNorm = 84.0111, GNorm = 0.7180, lr_0 = 1.8176e-04
Loss = 9.5958e-02, PNorm = 84.0136, GNorm = 0.6946, lr_0 = 1.8163e-04
Loss = 9.4010e-02, PNorm = 84.0168, GNorm = 0.7994, lr_0 = 1.8151e-04
Loss = 8.4464e-02, PNorm = 84.0208, GNorm = 0.5735, lr_0 = 1.8138e-04
Loss = 9.3872e-02, PNorm = 84.0235, GNorm = 0.7162, lr_0 = 1.8126e-04
Loss = 8.2866e-02, PNorm = 84.0219, GNorm = 0.5793, lr_0 = 1.8114e-04
Loss = 9.6859e-02, PNorm = 84.0241, GNorm = 0.6111, lr_0 = 1.8101e-04
Loss = 8.9346e-02, PNorm = 84.0251, GNorm = 0.6718, lr_0 = 1.8089e-04
Loss = 8.3965e-02, PNorm = 84.0272, GNorm = 0.5401, lr_0 = 1.8076e-04
Loss = 9.3110e-02, PNorm = 84.0295, GNorm = 0.7339, lr_0 = 1.8064e-04
Loss = 9.1953e-02, PNorm = 84.0322, GNorm = 0.7773, lr_0 = 1.8052e-04
Loss = 8.8481e-02, PNorm = 84.0325, GNorm = 0.7165, lr_0 = 1.8039e-04
Loss = 8.0601e-02, PNorm = 84.0363, GNorm = 0.7302, lr_0 = 1.8027e-04
Loss = 7.8851e-02, PNorm = 84.0387, GNorm = 0.6673, lr_0 = 1.8015e-04
Loss = 8.1916e-02, PNorm = 84.0444, GNorm = 0.6426, lr_0 = 1.8002e-04
Loss = 8.4858e-02, PNorm = 84.0494, GNorm = 0.5932, lr_0 = 1.7990e-04
Loss = 8.6174e-02, PNorm = 84.0518, GNorm = 0.5502, lr_0 = 1.7978e-04
Loss = 8.8484e-02, PNorm = 84.0530, GNorm = 0.5625, lr_0 = 1.7965e-04
Loss = 8.5611e-02, PNorm = 84.0549, GNorm = 0.7373, lr_0 = 1.7953e-04
Loss = 8.1150e-02, PNorm = 84.0573, GNorm = 0.6948, lr_0 = 1.7941e-04
Loss = 9.7777e-02, PNorm = 84.0617, GNorm = 0.7135, lr_0 = 1.7928e-04
Loss = 8.6648e-02, PNorm = 84.0644, GNorm = 0.6509, lr_0 = 1.7916e-04
Loss = 8.9836e-02, PNorm = 84.0685, GNorm = 0.8138, lr_0 = 1.7904e-04
Loss = 8.7318e-02, PNorm = 84.0708, GNorm = 0.7473, lr_0 = 1.7892e-04
Loss = 9.0123e-02, PNorm = 84.0720, GNorm = 0.5234, lr_0 = 1.7879e-04
Loss = 9.7199e-02, PNorm = 84.0755, GNorm = 0.6678, lr_0 = 1.7867e-04
Loss = 7.5706e-02, PNorm = 84.0790, GNorm = 0.5809, lr_0 = 1.7855e-04
Loss = 8.0695e-02, PNorm = 84.0811, GNorm = 0.5074, lr_0 = 1.7843e-04
Loss = 9.3667e-02, PNorm = 84.0827, GNorm = 0.6685, lr_0 = 1.7830e-04
Loss = 1.0017e-01, PNorm = 84.0851, GNorm = 0.7949, lr_0 = 1.7818e-04
Loss = 8.9840e-02, PNorm = 84.0881, GNorm = 0.7278, lr_0 = 1.7806e-04
Loss = 8.4762e-02, PNorm = 84.0933, GNorm = 0.6146, lr_0 = 1.7794e-04
Loss = 9.7621e-02, PNorm = 84.0953, GNorm = 0.7679, lr_0 = 1.7782e-04
Validation mae = 0.227057
Epoch 23
Loss = 9.0409e-02, PNorm = 84.0985, GNorm = 0.8829, lr_0 = 1.7769e-04
Loss = 8.4670e-02, PNorm = 84.1031, GNorm = 0.7512, lr_0 = 1.7757e-04
Loss = 8.0336e-02, PNorm = 84.1073, GNorm = 0.6805, lr_0 = 1.7745e-04
Loss = 8.0591e-02, PNorm = 84.1089, GNorm = 0.8374, lr_0 = 1.7733e-04
Loss = 8.5481e-02, PNorm = 84.1124, GNorm = 0.6788, lr_0 = 1.7721e-04
Loss = 8.5483e-02, PNorm = 84.1152, GNorm = 0.5740, lr_0 = 1.7709e-04
Loss = 8.3423e-02, PNorm = 84.1201, GNorm = 0.7044, lr_0 = 1.7696e-04
Loss = 8.8609e-02, PNorm = 84.1264, GNorm = 0.7136, lr_0 = 1.7684e-04
Loss = 7.0861e-02, PNorm = 84.1316, GNorm = 0.5041, lr_0 = 1.7672e-04
Loss = 7.7549e-02, PNorm = 84.1349, GNorm = 0.6291, lr_0 = 1.7660e-04
Loss = 8.7027e-02, PNorm = 84.1381, GNorm = 0.6303, lr_0 = 1.7648e-04
Loss = 7.8547e-02, PNorm = 84.1399, GNorm = 0.6189, lr_0 = 1.7636e-04
Loss = 8.4939e-02, PNorm = 84.1423, GNorm = 0.6213, lr_0 = 1.7624e-04
Loss = 8.4555e-02, PNorm = 84.1467, GNorm = 0.5736, lr_0 = 1.7612e-04
Loss = 7.6772e-02, PNorm = 84.1489, GNorm = 0.9389, lr_0 = 1.7600e-04
Loss = 8.1461e-02, PNorm = 84.1500, GNorm = 0.6988, lr_0 = 1.7588e-04
Loss = 8.7291e-02, PNorm = 84.1535, GNorm = 0.7184, lr_0 = 1.7576e-04
Loss = 7.8698e-02, PNorm = 84.1549, GNorm = 0.7497, lr_0 = 1.7564e-04
Loss = 7.9176e-02, PNorm = 84.1571, GNorm = 0.7067, lr_0 = 1.7552e-04
Loss = 8.8315e-02, PNorm = 84.1603, GNorm = 0.6764, lr_0 = 1.7540e-04
Loss = 1.0042e-01, PNorm = 84.1640, GNorm = 0.5572, lr_0 = 1.7528e-04
Loss = 7.6396e-02, PNorm = 84.1681, GNorm = 0.7571, lr_0 = 1.7516e-04
Loss = 8.6873e-02, PNorm = 84.1703, GNorm = 0.5199, lr_0 = 1.7504e-04
Loss = 8.5127e-02, PNorm = 84.1732, GNorm = 0.8110, lr_0 = 1.7492e-04
Loss = 8.2663e-02, PNorm = 84.1750, GNorm = 0.5101, lr_0 = 1.7480e-04
Loss = 7.7513e-02, PNorm = 84.1774, GNorm = 0.5014, lr_0 = 1.7468e-04
Loss = 8.1200e-02, PNorm = 84.1775, GNorm = 0.5855, lr_0 = 1.7456e-04
Loss = 9.2182e-02, PNorm = 84.1800, GNorm = 0.5775, lr_0 = 1.7444e-04
Loss = 8.4194e-02, PNorm = 84.1837, GNorm = 0.7534, lr_0 = 1.7432e-04
Loss = 8.3495e-02, PNorm = 84.1862, GNorm = 0.6816, lr_0 = 1.7420e-04
Loss = 9.7009e-02, PNorm = 84.1900, GNorm = 0.8082, lr_0 = 1.7408e-04
Loss = 8.3855e-02, PNorm = 84.1921, GNorm = 0.4979, lr_0 = 1.7396e-04
Loss = 7.9423e-02, PNorm = 84.1948, GNorm = 0.7685, lr_0 = 1.7384e-04
Loss = 1.0523e-01, PNorm = 84.1955, GNorm = 0.5867, lr_0 = 1.7372e-04
Loss = 8.3440e-02, PNorm = 84.1991, GNorm = 0.5425, lr_0 = 1.7360e-04
Loss = 8.7377e-02, PNorm = 84.2044, GNorm = 0.5981, lr_0 = 1.7348e-04
Loss = 7.3106e-02, PNorm = 84.2087, GNorm = 0.5758, lr_0 = 1.7336e-04
Loss = 8.7727e-02, PNorm = 84.2130, GNorm = 0.6682, lr_0 = 1.7325e-04
Loss = 7.9516e-02, PNorm = 84.2166, GNorm = 0.7603, lr_0 = 1.7313e-04
Loss = 8.1555e-02, PNorm = 84.2179, GNorm = 0.6176, lr_0 = 1.7301e-04
Loss = 8.1527e-02, PNorm = 84.2201, GNorm = 0.6430, lr_0 = 1.7289e-04
Loss = 9.0728e-02, PNorm = 84.2256, GNorm = 0.6847, lr_0 = 1.7277e-04
Loss = 8.7155e-02, PNorm = 84.2272, GNorm = 1.1264, lr_0 = 1.7265e-04
Loss = 8.6843e-02, PNorm = 84.2289, GNorm = 0.5904, lr_0 = 1.7253e-04
Loss = 9.7824e-02, PNorm = 84.2327, GNorm = 0.5154, lr_0 = 1.7242e-04
Loss = 8.6283e-02, PNorm = 84.2346, GNorm = 0.5896, lr_0 = 1.7230e-04
Loss = 8.5604e-02, PNorm = 84.2360, GNorm = 0.6586, lr_0 = 1.7218e-04
Loss = 8.8880e-02, PNorm = 84.2369, GNorm = 0.6118, lr_0 = 1.7206e-04
Loss = 8.6720e-02, PNorm = 84.2387, GNorm = 0.6329, lr_0 = 1.7194e-04
Loss = 7.9429e-02, PNorm = 84.2398, GNorm = 0.8612, lr_0 = 1.7183e-04
Loss = 9.5394e-02, PNorm = 84.2418, GNorm = 0.6515, lr_0 = 1.7171e-04
Loss = 7.9790e-02, PNorm = 84.2432, GNorm = 0.4867, lr_0 = 1.7159e-04
Loss = 7.7200e-02, PNorm = 84.2467, GNorm = 0.4804, lr_0 = 1.7147e-04
Loss = 8.9618e-02, PNorm = 84.2505, GNorm = 0.6511, lr_0 = 1.7136e-04
Loss = 7.9439e-02, PNorm = 84.2537, GNorm = 0.4947, lr_0 = 1.7124e-04
Loss = 8.0505e-02, PNorm = 84.2565, GNorm = 0.7766, lr_0 = 1.7112e-04
Loss = 9.2752e-02, PNorm = 84.2586, GNorm = 0.9246, lr_0 = 1.7100e-04
Loss = 8.3708e-02, PNorm = 84.2628, GNorm = 0.6050, lr_0 = 1.7089e-04
Loss = 8.8586e-02, PNorm = 84.2671, GNorm = 0.7221, lr_0 = 1.7077e-04
Loss = 8.0995e-02, PNorm = 84.2707, GNorm = 0.7294, lr_0 = 1.7065e-04
Loss = 8.6016e-02, PNorm = 84.2729, GNorm = 0.7481, lr_0 = 1.7054e-04
Loss = 7.5327e-02, PNorm = 84.2762, GNorm = 0.5737, lr_0 = 1.7042e-04
Loss = 8.7250e-02, PNorm = 84.2779, GNorm = 0.7229, lr_0 = 1.7030e-04
Loss = 7.8727e-02, PNorm = 84.2779, GNorm = 0.6402, lr_0 = 1.7019e-04
Loss = 8.1466e-02, PNorm = 84.2776, GNorm = 0.5018, lr_0 = 1.7007e-04
Loss = 8.6741e-02, PNorm = 84.2807, GNorm = 0.6113, lr_0 = 1.6995e-04
Loss = 7.7553e-02, PNorm = 84.2854, GNorm = 0.5503, lr_0 = 1.6984e-04
Loss = 9.4518e-02, PNorm = 84.2897, GNorm = 0.8908, lr_0 = 1.6972e-04
Loss = 7.0668e-02, PNorm = 84.2938, GNorm = 0.7557, lr_0 = 1.6960e-04
Loss = 9.6824e-02, PNorm = 84.2984, GNorm = 0.7107, lr_0 = 1.6949e-04
Loss = 7.6996e-02, PNorm = 84.3025, GNorm = 0.8591, lr_0 = 1.6937e-04
Loss = 7.6447e-02, PNorm = 84.3059, GNorm = 0.4799, lr_0 = 1.6926e-04
Loss = 8.6052e-02, PNorm = 84.3071, GNorm = 0.6490, lr_0 = 1.6914e-04
Loss = 8.3959e-02, PNorm = 84.3105, GNorm = 0.7000, lr_0 = 1.6902e-04
Loss = 7.4472e-02, PNorm = 84.3136, GNorm = 0.6332, lr_0 = 1.6891e-04
Loss = 8.0794e-02, PNorm = 84.3159, GNorm = 0.7424, lr_0 = 1.6879e-04
Loss = 7.8665e-02, PNorm = 84.3144, GNorm = 0.5116, lr_0 = 1.6868e-04
Loss = 8.3036e-02, PNorm = 84.3151, GNorm = 0.5226, lr_0 = 1.6856e-04
Loss = 9.2034e-02, PNorm = 84.3177, GNorm = 0.6448, lr_0 = 1.6845e-04
Loss = 7.7420e-02, PNorm = 84.3190, GNorm = 0.8778, lr_0 = 1.6833e-04
Loss = 8.9280e-02, PNorm = 84.3210, GNorm = 0.9410, lr_0 = 1.6821e-04
Loss = 7.7666e-02, PNorm = 84.3244, GNorm = 0.6317, lr_0 = 1.6810e-04
Loss = 7.8559e-02, PNorm = 84.3284, GNorm = 0.6011, lr_0 = 1.6798e-04
Loss = 7.2232e-02, PNorm = 84.3292, GNorm = 0.7051, lr_0 = 1.6787e-04
Loss = 8.5280e-02, PNorm = 84.3292, GNorm = 0.6041, lr_0 = 1.6775e-04
Loss = 1.0093e-01, PNorm = 84.3322, GNorm = 0.7535, lr_0 = 1.6764e-04
Loss = 8.1747e-02, PNorm = 84.3352, GNorm = 0.8956, lr_0 = 1.6752e-04
Loss = 9.4641e-02, PNorm = 84.3364, GNorm = 0.5572, lr_0 = 1.6741e-04
Loss = 9.2068e-02, PNorm = 84.3401, GNorm = 0.6832, lr_0 = 1.6729e-04
Loss = 7.9809e-02, PNorm = 84.3462, GNorm = 0.5998, lr_0 = 1.6718e-04
Loss = 8.9335e-02, PNorm = 84.3469, GNorm = 0.5838, lr_0 = 1.6707e-04
Loss = 8.0524e-02, PNorm = 84.3501, GNorm = 0.5783, lr_0 = 1.6695e-04
Loss = 9.6852e-02, PNorm = 84.3528, GNorm = 0.8233, lr_0 = 1.6684e-04
Loss = 7.3841e-02, PNorm = 84.3548, GNorm = 0.5634, lr_0 = 1.6672e-04
Loss = 7.5344e-02, PNorm = 84.3556, GNorm = 0.4913, lr_0 = 1.6661e-04
Loss = 8.1675e-02, PNorm = 84.3599, GNorm = 0.5043, lr_0 = 1.6649e-04
Loss = 8.4896e-02, PNorm = 84.3626, GNorm = 0.7485, lr_0 = 1.6638e-04
Loss = 7.6774e-02, PNorm = 84.3632, GNorm = 0.4792, lr_0 = 1.6627e-04
Loss = 8.0395e-02, PNorm = 84.3646, GNorm = 0.5756, lr_0 = 1.6615e-04
Loss = 8.9129e-02, PNorm = 84.3704, GNorm = 0.6757, lr_0 = 1.6604e-04
Loss = 7.7089e-02, PNorm = 84.3745, GNorm = 0.7872, lr_0 = 1.6592e-04
Loss = 8.9437e-02, PNorm = 84.3774, GNorm = 0.5887, lr_0 = 1.6581e-04
Loss = 7.4947e-02, PNorm = 84.3813, GNorm = 0.5813, lr_0 = 1.6570e-04
Loss = 8.7395e-02, PNorm = 84.3809, GNorm = 0.5042, lr_0 = 1.6558e-04
Loss = 8.6716e-02, PNorm = 84.3829, GNorm = 0.6122, lr_0 = 1.6547e-04
Loss = 8.7470e-02, PNorm = 84.3856, GNorm = 0.6873, lr_0 = 1.6536e-04
Loss = 8.2421e-02, PNorm = 84.3888, GNorm = 0.6652, lr_0 = 1.6524e-04
Loss = 8.4921e-02, PNorm = 84.3887, GNorm = 0.6026, lr_0 = 1.6513e-04
Loss = 8.9298e-02, PNorm = 84.3906, GNorm = 0.7279, lr_0 = 1.6502e-04
Loss = 9.2318e-02, PNorm = 84.3916, GNorm = 0.6293, lr_0 = 1.6490e-04
Loss = 9.1973e-02, PNorm = 84.3941, GNorm = 0.7265, lr_0 = 1.6479e-04
Loss = 8.0375e-02, PNorm = 84.3967, GNorm = 0.5718, lr_0 = 1.6468e-04
Loss = 7.8446e-02, PNorm = 84.3979, GNorm = 0.5696, lr_0 = 1.6457e-04
Loss = 9.1476e-02, PNorm = 84.3998, GNorm = 0.7145, lr_0 = 1.6445e-04
Loss = 7.6614e-02, PNorm = 84.4041, GNorm = 0.7972, lr_0 = 1.6434e-04
Loss = 8.9456e-02, PNorm = 84.4085, GNorm = 0.7231, lr_0 = 1.6423e-04
Loss = 9.6127e-02, PNorm = 84.4120, GNorm = 0.5943, lr_0 = 1.6412e-04
Loss = 8.6826e-02, PNorm = 84.4145, GNorm = 0.8129, lr_0 = 1.6400e-04
Loss = 9.3200e-02, PNorm = 84.4153, GNorm = 0.7049, lr_0 = 1.6389e-04
Loss = 9.2015e-02, PNorm = 84.4178, GNorm = 0.7233, lr_0 = 1.6378e-04
Validation mae = 0.227818
Epoch 24
Loss = 7.4509e-02, PNorm = 84.4188, GNorm = 0.7884, lr_0 = 1.6367e-04
Loss = 8.1026e-02, PNorm = 84.4218, GNorm = 0.6460, lr_0 = 1.6355e-04
Loss = 8.1711e-02, PNorm = 84.4257, GNorm = 0.6212, lr_0 = 1.6344e-04
Loss = 8.8022e-02, PNorm = 84.4261, GNorm = 0.6216, lr_0 = 1.6333e-04
Loss = 8.4283e-02, PNorm = 84.4275, GNorm = 0.6619, lr_0 = 1.6322e-04
Loss = 7.2611e-02, PNorm = 84.4294, GNorm = 0.8432, lr_0 = 1.6311e-04
Loss = 7.5303e-02, PNorm = 84.4307, GNorm = 0.7371, lr_0 = 1.6299e-04
Loss = 7.9966e-02, PNorm = 84.4344, GNorm = 0.6982, lr_0 = 1.6288e-04
Loss = 7.0143e-02, PNorm = 84.4372, GNorm = 0.6339, lr_0 = 1.6277e-04
Loss = 7.5247e-02, PNorm = 84.4398, GNorm = 0.8421, lr_0 = 1.6266e-04
Loss = 8.8213e-02, PNorm = 84.4437, GNorm = 0.6418, lr_0 = 1.6255e-04
Loss = 7.7386e-02, PNorm = 84.4463, GNorm = 0.6701, lr_0 = 1.6244e-04
Loss = 8.3974e-02, PNorm = 84.4484, GNorm = 0.4347, lr_0 = 1.6233e-04
Loss = 8.3228e-02, PNorm = 84.4511, GNorm = 0.7186, lr_0 = 1.6221e-04
Loss = 8.0981e-02, PNorm = 84.4535, GNorm = 0.5208, lr_0 = 1.6210e-04
Loss = 7.7026e-02, PNorm = 84.4571, GNorm = 0.5672, lr_0 = 1.6199e-04
Loss = 8.9476e-02, PNorm = 84.4589, GNorm = 0.6477, lr_0 = 1.6188e-04
Loss = 8.4780e-02, PNorm = 84.4591, GNorm = 0.7498, lr_0 = 1.6177e-04
Loss = 7.9756e-02, PNorm = 84.4631, GNorm = 0.5393, lr_0 = 1.6166e-04
Loss = 8.5371e-02, PNorm = 84.4669, GNorm = 0.9422, lr_0 = 1.6155e-04
Loss = 7.0617e-02, PNorm = 84.4701, GNorm = 0.5141, lr_0 = 1.6144e-04
Loss = 8.0000e-02, PNorm = 84.4721, GNorm = 0.6604, lr_0 = 1.6133e-04
Loss = 9.6750e-02, PNorm = 84.4739, GNorm = 0.6536, lr_0 = 1.6122e-04
Loss = 8.4094e-02, PNorm = 84.4774, GNorm = 0.5268, lr_0 = 1.6111e-04
Loss = 8.7785e-02, PNorm = 84.4800, GNorm = 0.9232, lr_0 = 1.6100e-04
Loss = 8.7011e-02, PNorm = 84.4833, GNorm = 0.6478, lr_0 = 1.6089e-04
Loss = 8.6045e-02, PNorm = 84.4849, GNorm = 0.4890, lr_0 = 1.6078e-04
Loss = 7.2510e-02, PNorm = 84.4867, GNorm = 0.7523, lr_0 = 1.6067e-04
Loss = 8.1853e-02, PNorm = 84.4882, GNorm = 0.6447, lr_0 = 1.6056e-04
Loss = 8.8624e-02, PNorm = 84.4916, GNorm = 0.5581, lr_0 = 1.6045e-04
Loss = 9.1540e-02, PNorm = 84.4939, GNorm = 0.5110, lr_0 = 1.6034e-04
Loss = 7.6019e-02, PNorm = 84.4946, GNorm = 0.6270, lr_0 = 1.6023e-04
Loss = 7.1157e-02, PNorm = 84.4973, GNorm = 0.6272, lr_0 = 1.6012e-04
Loss = 7.9123e-02, PNorm = 84.4982, GNorm = 0.7197, lr_0 = 1.6001e-04
Loss = 8.0652e-02, PNorm = 84.5019, GNorm = 0.5524, lr_0 = 1.5990e-04
Loss = 8.4890e-02, PNorm = 84.5033, GNorm = 0.5614, lr_0 = 1.5979e-04
Loss = 8.3368e-02, PNorm = 84.5064, GNorm = 0.4470, lr_0 = 1.5968e-04
Loss = 8.1110e-02, PNorm = 84.5095, GNorm = 0.6019, lr_0 = 1.5957e-04
Loss = 8.0445e-02, PNorm = 84.5112, GNorm = 0.4949, lr_0 = 1.5946e-04
Loss = 7.1511e-02, PNorm = 84.5142, GNorm = 0.5743, lr_0 = 1.5935e-04
Loss = 7.9457e-02, PNorm = 84.5162, GNorm = 0.9400, lr_0 = 1.5924e-04
Loss = 8.3401e-02, PNorm = 84.5185, GNorm = 0.8125, lr_0 = 1.5913e-04
Loss = 8.0442e-02, PNorm = 84.5197, GNorm = 0.5901, lr_0 = 1.5902e-04
Loss = 8.7135e-02, PNorm = 84.5195, GNorm = 0.5522, lr_0 = 1.5891e-04
Loss = 8.6820e-02, PNorm = 84.5216, GNorm = 0.5666, lr_0 = 1.5880e-04
Loss = 8.4995e-02, PNorm = 84.5248, GNorm = 0.5776, lr_0 = 1.5870e-04
Loss = 7.9650e-02, PNorm = 84.5286, GNorm = 0.5585, lr_0 = 1.5859e-04
Loss = 1.0267e-01, PNorm = 84.5310, GNorm = 0.7087, lr_0 = 1.5848e-04
Loss = 7.8682e-02, PNorm = 84.5331, GNorm = 0.6125, lr_0 = 1.5837e-04
Loss = 7.1253e-02, PNorm = 84.5367, GNorm = 0.4410, lr_0 = 1.5826e-04
Loss = 7.5278e-02, PNorm = 84.5411, GNorm = 0.7068, lr_0 = 1.5815e-04
Loss = 8.7600e-02, PNorm = 84.5454, GNorm = 0.5385, lr_0 = 1.5804e-04
Loss = 7.2368e-02, PNorm = 84.5488, GNorm = 0.6459, lr_0 = 1.5794e-04
Loss = 8.0338e-02, PNorm = 84.5519, GNorm = 0.4583, lr_0 = 1.5783e-04
Loss = 9.1673e-02, PNorm = 84.5555, GNorm = 0.5688, lr_0 = 1.5772e-04
Loss = 8.9033e-02, PNorm = 84.5600, GNorm = 0.6416, lr_0 = 1.5761e-04
Loss = 8.1042e-02, PNorm = 84.5633, GNorm = 0.8246, lr_0 = 1.5750e-04
Loss = 7.8964e-02, PNorm = 84.5649, GNorm = 0.6635, lr_0 = 1.5740e-04
Loss = 7.6936e-02, PNorm = 84.5682, GNorm = 0.6240, lr_0 = 1.5729e-04
Loss = 9.0797e-02, PNorm = 84.5722, GNorm = 0.5214, lr_0 = 1.5718e-04
Loss = 8.7429e-02, PNorm = 84.5753, GNorm = 0.6460, lr_0 = 1.5707e-04
Loss = 7.9749e-02, PNorm = 84.5787, GNorm = 0.5567, lr_0 = 1.5697e-04
Loss = 7.3039e-02, PNorm = 84.5817, GNorm = 0.6721, lr_0 = 1.5686e-04
Loss = 8.7187e-02, PNorm = 84.5831, GNorm = 0.8862, lr_0 = 1.5675e-04
Loss = 8.3283e-02, PNorm = 84.5860, GNorm = 0.8045, lr_0 = 1.5664e-04
Loss = 9.4783e-02, PNorm = 84.5894, GNorm = 0.8468, lr_0 = 1.5654e-04
Loss = 8.3712e-02, PNorm = 84.5901, GNorm = 0.5852, lr_0 = 1.5643e-04
Loss = 8.5893e-02, PNorm = 84.5895, GNorm = 0.4918, lr_0 = 1.5632e-04
Loss = 1.0099e-01, PNorm = 84.5915, GNorm = 0.6004, lr_0 = 1.5621e-04
Loss = 7.1262e-02, PNorm = 84.5942, GNorm = 0.7631, lr_0 = 1.5611e-04
Loss = 8.0872e-02, PNorm = 84.5950, GNorm = 0.5570, lr_0 = 1.5600e-04
Loss = 8.7672e-02, PNorm = 84.5970, GNorm = 0.6445, lr_0 = 1.5589e-04
Loss = 8.4344e-02, PNorm = 84.5984, GNorm = 0.6105, lr_0 = 1.5579e-04
Loss = 7.9461e-02, PNorm = 84.6005, GNorm = 0.8469, lr_0 = 1.5568e-04
Loss = 7.8029e-02, PNorm = 84.6012, GNorm = 0.6011, lr_0 = 1.5557e-04
Loss = 8.2151e-02, PNorm = 84.6030, GNorm = 0.5875, lr_0 = 1.5547e-04
Loss = 7.5756e-02, PNorm = 84.6060, GNorm = 0.5676, lr_0 = 1.5536e-04
Loss = 7.2304e-02, PNorm = 84.6080, GNorm = 0.8046, lr_0 = 1.5525e-04
Loss = 8.0548e-02, PNorm = 84.6082, GNorm = 0.5932, lr_0 = 1.5515e-04
Loss = 8.9528e-02, PNorm = 84.6104, GNorm = 0.8239, lr_0 = 1.5504e-04
Loss = 8.0490e-02, PNorm = 84.6132, GNorm = 0.6036, lr_0 = 1.5493e-04
Loss = 1.0465e-01, PNorm = 84.6172, GNorm = 0.6682, lr_0 = 1.5483e-04
Loss = 7.8908e-02, PNorm = 84.6192, GNorm = 0.5634, lr_0 = 1.5472e-04
Loss = 7.8733e-02, PNorm = 84.6208, GNorm = 0.6514, lr_0 = 1.5462e-04
Loss = 7.5233e-02, PNorm = 84.6221, GNorm = 0.7972, lr_0 = 1.5451e-04
Loss = 8.0900e-02, PNorm = 84.6237, GNorm = 0.7682, lr_0 = 1.5440e-04
Loss = 7.5712e-02, PNorm = 84.6241, GNorm = 0.7286, lr_0 = 1.5430e-04
Loss = 7.9804e-02, PNorm = 84.6275, GNorm = 0.5195, lr_0 = 1.5419e-04
Loss = 9.3471e-02, PNorm = 84.6308, GNorm = 0.6855, lr_0 = 1.5409e-04
Loss = 9.8295e-02, PNorm = 84.6301, GNorm = 0.7268, lr_0 = 1.5398e-04
Loss = 7.7501e-02, PNorm = 84.6331, GNorm = 0.8730, lr_0 = 1.5388e-04
Loss = 8.6870e-02, PNorm = 84.6359, GNorm = 0.7050, lr_0 = 1.5377e-04
Loss = 8.8119e-02, PNorm = 84.6375, GNorm = 0.6734, lr_0 = 1.5367e-04
Loss = 8.6227e-02, PNorm = 84.6399, GNorm = 0.5986, lr_0 = 1.5356e-04
Loss = 8.1939e-02, PNorm = 84.6430, GNorm = 0.5767, lr_0 = 1.5346e-04
Loss = 7.5409e-02, PNorm = 84.6466, GNorm = 0.5494, lr_0 = 1.5335e-04
Loss = 7.5695e-02, PNorm = 84.6500, GNorm = 0.7802, lr_0 = 1.5325e-04
Loss = 7.7289e-02, PNorm = 84.6503, GNorm = 0.9260, lr_0 = 1.5314e-04
Loss = 8.3669e-02, PNorm = 84.6525, GNorm = 0.5704, lr_0 = 1.5304e-04
Loss = 9.6398e-02, PNorm = 84.6538, GNorm = 0.5498, lr_0 = 1.5293e-04
Loss = 9.3426e-02, PNorm = 84.6559, GNorm = 0.5012, lr_0 = 1.5283e-04
Loss = 7.5689e-02, PNorm = 84.6577, GNorm = 0.5138, lr_0 = 1.5272e-04
Loss = 7.7568e-02, PNorm = 84.6602, GNorm = 0.4756, lr_0 = 1.5262e-04
Loss = 8.2337e-02, PNorm = 84.6630, GNorm = 0.4705, lr_0 = 1.5251e-04
Loss = 7.8130e-02, PNorm = 84.6647, GNorm = 0.4445, lr_0 = 1.5241e-04
Loss = 8.9008e-02, PNorm = 84.6677, GNorm = 0.5540, lr_0 = 1.5230e-04
Loss = 8.0641e-02, PNorm = 84.6704, GNorm = 0.4763, lr_0 = 1.5220e-04
Loss = 8.5805e-02, PNorm = 84.6721, GNorm = 0.7839, lr_0 = 1.5209e-04
Loss = 8.4096e-02, PNorm = 84.6762, GNorm = 0.6190, lr_0 = 1.5199e-04
Loss = 8.3800e-02, PNorm = 84.6795, GNorm = 0.7153, lr_0 = 1.5189e-04
Loss = 8.6197e-02, PNorm = 84.6806, GNorm = 0.7666, lr_0 = 1.5178e-04
Loss = 7.6230e-02, PNorm = 84.6832, GNorm = 0.4733, lr_0 = 1.5168e-04
Loss = 8.4604e-02, PNorm = 84.6843, GNorm = 0.5186, lr_0 = 1.5157e-04
Loss = 8.8753e-02, PNorm = 84.6849, GNorm = 0.6389, lr_0 = 1.5147e-04
Loss = 8.4968e-02, PNorm = 84.6874, GNorm = 0.6257, lr_0 = 1.5137e-04
Loss = 8.1813e-02, PNorm = 84.6907, GNorm = 0.5161, lr_0 = 1.5126e-04
Loss = 8.6083e-02, PNorm = 84.6951, GNorm = 0.8230, lr_0 = 1.5116e-04
Loss = 8.6399e-02, PNorm = 84.6967, GNorm = 0.6165, lr_0 = 1.5106e-04
Loss = 8.6821e-02, PNorm = 84.6969, GNorm = 0.8096, lr_0 = 1.5095e-04
Loss = 9.4966e-02, PNorm = 84.6966, GNorm = 0.5348, lr_0 = 1.5085e-04
Validation mae = 0.226847
Epoch 25
Loss = 7.9414e-02, PNorm = 84.6974, GNorm = 0.5697, lr_0 = 1.5075e-04
Loss = 7.7010e-02, PNorm = 84.6997, GNorm = 0.8835, lr_0 = 1.5064e-04
Loss = 7.7213e-02, PNorm = 84.7030, GNorm = 0.6590, lr_0 = 1.5054e-04
Loss = 7.2298e-02, PNorm = 84.7052, GNorm = 0.6207, lr_0 = 1.5044e-04
Loss = 7.0478e-02, PNorm = 84.7067, GNorm = 0.6601, lr_0 = 1.5033e-04
Loss = 8.5437e-02, PNorm = 84.7097, GNorm = 0.5236, lr_0 = 1.5023e-04
Loss = 7.8092e-02, PNorm = 84.7125, GNorm = 0.6539, lr_0 = 1.5013e-04
Loss = 7.3851e-02, PNorm = 84.7140, GNorm = 0.6379, lr_0 = 1.5002e-04
Loss = 7.6581e-02, PNorm = 84.7167, GNorm = 1.0106, lr_0 = 1.4992e-04
Loss = 7.5660e-02, PNorm = 84.7178, GNorm = 0.6057, lr_0 = 1.4982e-04
Loss = 7.9723e-02, PNorm = 84.7203, GNorm = 0.4753, lr_0 = 1.4972e-04
Loss = 7.8061e-02, PNorm = 84.7224, GNorm = 0.7347, lr_0 = 1.4961e-04
Loss = 8.2633e-02, PNorm = 84.7241, GNorm = 0.6434, lr_0 = 1.4951e-04
Loss = 8.1304e-02, PNorm = 84.7271, GNorm = 0.8704, lr_0 = 1.4941e-04
Loss = 8.8648e-02, PNorm = 84.7315, GNorm = 0.7835, lr_0 = 1.4931e-04
Loss = 7.9001e-02, PNorm = 84.7348, GNorm = 0.5293, lr_0 = 1.4920e-04
Loss = 8.5122e-02, PNorm = 84.7372, GNorm = 0.5454, lr_0 = 1.4910e-04
Loss = 7.0550e-02, PNorm = 84.7400, GNorm = 0.7283, lr_0 = 1.4900e-04
Loss = 7.6987e-02, PNorm = 84.7402, GNorm = 0.5462, lr_0 = 1.4890e-04
Loss = 8.2877e-02, PNorm = 84.7407, GNorm = 0.6242, lr_0 = 1.4880e-04
Loss = 7.7934e-02, PNorm = 84.7419, GNorm = 0.7664, lr_0 = 1.4869e-04
Loss = 7.6189e-02, PNorm = 84.7421, GNorm = 0.4489, lr_0 = 1.4859e-04
Loss = 7.4202e-02, PNorm = 84.7443, GNorm = 0.5163, lr_0 = 1.4849e-04
Loss = 8.4841e-02, PNorm = 84.7474, GNorm = 0.6221, lr_0 = 1.4839e-04
Loss = 7.1029e-02, PNorm = 84.7507, GNorm = 0.7257, lr_0 = 1.4829e-04
Loss = 7.6775e-02, PNorm = 84.7510, GNorm = 0.6175, lr_0 = 1.4818e-04
Loss = 8.4869e-02, PNorm = 84.7524, GNorm = 0.7001, lr_0 = 1.4808e-04
Loss = 9.6324e-02, PNorm = 84.7527, GNorm = 0.7055, lr_0 = 1.4798e-04
Loss = 7.5861e-02, PNorm = 84.7529, GNorm = 0.6603, lr_0 = 1.4788e-04
Loss = 8.2170e-02, PNorm = 84.7561, GNorm = 0.5722, lr_0 = 1.4778e-04
Loss = 8.6290e-02, PNorm = 84.7574, GNorm = 0.8363, lr_0 = 1.4768e-04
Loss = 8.2664e-02, PNorm = 84.7597, GNorm = 0.6017, lr_0 = 1.4758e-04
Loss = 8.2613e-02, PNorm = 84.7647, GNorm = 0.7640, lr_0 = 1.4748e-04
Loss = 7.6607e-02, PNorm = 84.7683, GNorm = 0.5420, lr_0 = 1.4737e-04
Loss = 8.0590e-02, PNorm = 84.7708, GNorm = 0.6034, lr_0 = 1.4727e-04
Loss = 8.0386e-02, PNorm = 84.7730, GNorm = 0.5910, lr_0 = 1.4717e-04
Loss = 6.7011e-02, PNorm = 84.7758, GNorm = 0.5806, lr_0 = 1.4707e-04
Loss = 6.7032e-02, PNorm = 84.7777, GNorm = 0.5209, lr_0 = 1.4697e-04
Loss = 7.8010e-02, PNorm = 84.7797, GNorm = 0.7311, lr_0 = 1.4687e-04
Loss = 7.8229e-02, PNorm = 84.7801, GNorm = 0.5939, lr_0 = 1.4677e-04
Loss = 7.6349e-02, PNorm = 84.7817, GNorm = 0.6496, lr_0 = 1.4667e-04
Loss = 7.5362e-02, PNorm = 84.7824, GNorm = 0.5733, lr_0 = 1.4657e-04
Loss = 8.4323e-02, PNorm = 84.7843, GNorm = 0.8610, lr_0 = 1.4647e-04
Loss = 8.3765e-02, PNorm = 84.7853, GNorm = 0.5373, lr_0 = 1.4637e-04
Loss = 8.1723e-02, PNorm = 84.7873, GNorm = 0.4585, lr_0 = 1.4627e-04
Loss = 9.1905e-02, PNorm = 84.7903, GNorm = 1.0620, lr_0 = 1.4617e-04
Loss = 7.6422e-02, PNorm = 84.7917, GNorm = 0.8675, lr_0 = 1.4607e-04
Loss = 7.7307e-02, PNorm = 84.7946, GNorm = 0.5773, lr_0 = 1.4597e-04
Loss = 7.5823e-02, PNorm = 84.7954, GNorm = 0.6274, lr_0 = 1.4587e-04
Loss = 7.2571e-02, PNorm = 84.7961, GNorm = 0.4714, lr_0 = 1.4577e-04
Loss = 7.8508e-02, PNorm = 84.7992, GNorm = 0.5595, lr_0 = 1.4567e-04
Loss = 8.4204e-02, PNorm = 84.8022, GNorm = 0.6295, lr_0 = 1.4557e-04
Loss = 8.4231e-02, PNorm = 84.8035, GNorm = 0.6942, lr_0 = 1.4547e-04
Loss = 9.2516e-02, PNorm = 84.8064, GNorm = 1.1328, lr_0 = 1.4537e-04
Loss = 8.1958e-02, PNorm = 84.8089, GNorm = 0.5687, lr_0 = 1.4527e-04
Loss = 7.9935e-02, PNorm = 84.8104, GNorm = 0.7054, lr_0 = 1.4517e-04
Loss = 8.3019e-02, PNorm = 84.8132, GNorm = 0.5566, lr_0 = 1.4507e-04
Loss = 8.0681e-02, PNorm = 84.8178, GNorm = 0.7881, lr_0 = 1.4497e-04
Loss = 7.8146e-02, PNorm = 84.8205, GNorm = 0.5058, lr_0 = 1.4487e-04
Loss = 7.8600e-02, PNorm = 84.8227, GNorm = 0.5547, lr_0 = 1.4477e-04
Loss = 8.4993e-02, PNorm = 84.8260, GNorm = 0.6252, lr_0 = 1.4467e-04
Loss = 1.0029e-01, PNorm = 84.8276, GNorm = 0.5697, lr_0 = 1.4457e-04
Loss = 7.5486e-02, PNorm = 84.8303, GNorm = 0.6325, lr_0 = 1.4447e-04
Loss = 7.7864e-02, PNorm = 84.8317, GNorm = 0.6740, lr_0 = 1.4438e-04
Loss = 7.2752e-02, PNorm = 84.8338, GNorm = 0.5994, lr_0 = 1.4428e-04
Loss = 9.7199e-02, PNorm = 84.8364, GNorm = 1.0486, lr_0 = 1.4418e-04
Loss = 7.3465e-02, PNorm = 84.8395, GNorm = 0.6380, lr_0 = 1.4408e-04
Loss = 8.0808e-02, PNorm = 84.8428, GNorm = 0.5949, lr_0 = 1.4398e-04
Loss = 7.5573e-02, PNorm = 84.8432, GNorm = 0.6389, lr_0 = 1.4388e-04
Loss = 7.9822e-02, PNorm = 84.8453, GNorm = 0.6071, lr_0 = 1.4378e-04
Loss = 8.2237e-02, PNorm = 84.8481, GNorm = 0.8495, lr_0 = 1.4368e-04
Loss = 7.6077e-02, PNorm = 84.8507, GNorm = 0.6586, lr_0 = 1.4359e-04
Loss = 8.1574e-02, PNorm = 84.8532, GNorm = 0.7070, lr_0 = 1.4349e-04
Loss = 8.9080e-02, PNorm = 84.8565, GNorm = 0.6612, lr_0 = 1.4339e-04
Loss = 8.3092e-02, PNorm = 84.8605, GNorm = 0.5909, lr_0 = 1.4329e-04
Loss = 8.1437e-02, PNorm = 84.8626, GNorm = 0.7019, lr_0 = 1.4319e-04
Loss = 6.8949e-02, PNorm = 84.8650, GNorm = 0.6539, lr_0 = 1.4310e-04
Loss = 7.7405e-02, PNorm = 84.8670, GNorm = 0.7017, lr_0 = 1.4300e-04
Loss = 8.0173e-02, PNorm = 84.8693, GNorm = 0.5591, lr_0 = 1.4290e-04
Loss = 7.0691e-02, PNorm = 84.8723, GNorm = 0.6041, lr_0 = 1.4280e-04
Loss = 9.2583e-02, PNorm = 84.8752, GNorm = 0.6643, lr_0 = 1.4270e-04
Loss = 7.9983e-02, PNorm = 84.8781, GNorm = 0.6357, lr_0 = 1.4261e-04
Loss = 7.8951e-02, PNorm = 84.8822, GNorm = 0.6581, lr_0 = 1.4251e-04
Loss = 7.6533e-02, PNorm = 84.8841, GNorm = 0.5023, lr_0 = 1.4241e-04
Loss = 6.6363e-02, PNorm = 84.8858, GNorm = 0.5311, lr_0 = 1.4231e-04
Loss = 8.9507e-02, PNorm = 84.8884, GNorm = 0.5754, lr_0 = 1.4222e-04
Loss = 8.9689e-02, PNorm = 84.8911, GNorm = 0.8475, lr_0 = 1.4212e-04
Loss = 8.3906e-02, PNorm = 84.8915, GNorm = 0.5876, lr_0 = 1.4202e-04
Loss = 8.3272e-02, PNorm = 84.8923, GNorm = 0.6322, lr_0 = 1.4192e-04
Loss = 8.7116e-02, PNorm = 84.8933, GNorm = 0.7145, lr_0 = 1.4183e-04
Loss = 8.1408e-02, PNorm = 84.8931, GNorm = 0.6556, lr_0 = 1.4173e-04
Loss = 1.0823e-01, PNorm = 84.8937, GNorm = 0.7014, lr_0 = 1.4163e-04
Loss = 8.8467e-02, PNorm = 84.8957, GNorm = 0.6057, lr_0 = 1.4153e-04
Loss = 7.8298e-02, PNorm = 84.8996, GNorm = 0.5863, lr_0 = 1.4144e-04
Loss = 7.6743e-02, PNorm = 84.9031, GNorm = 0.7399, lr_0 = 1.4134e-04
Loss = 8.4880e-02, PNorm = 84.9057, GNorm = 0.5925, lr_0 = 1.4124e-04
Loss = 9.1138e-02, PNorm = 84.9070, GNorm = 0.6279, lr_0 = 1.4115e-04
Loss = 7.9792e-02, PNorm = 84.9074, GNorm = 0.8463, lr_0 = 1.4105e-04
Loss = 8.4293e-02, PNorm = 84.9096, GNorm = 0.8352, lr_0 = 1.4095e-04
Loss = 7.5373e-02, PNorm = 84.9118, GNorm = 0.6085, lr_0 = 1.4086e-04
Loss = 8.2704e-02, PNorm = 84.9144, GNorm = 0.6810, lr_0 = 1.4076e-04
Loss = 7.9144e-02, PNorm = 84.9153, GNorm = 0.5083, lr_0 = 1.4066e-04
Loss = 8.9845e-02, PNorm = 84.9172, GNorm = 0.5961, lr_0 = 1.4057e-04
Loss = 7.6184e-02, PNorm = 84.9188, GNorm = 0.8204, lr_0 = 1.4047e-04
Loss = 8.8253e-02, PNorm = 84.9219, GNorm = 0.4708, lr_0 = 1.4038e-04
Loss = 8.4143e-02, PNorm = 84.9252, GNorm = 0.5394, lr_0 = 1.4028e-04
Loss = 9.0383e-02, PNorm = 84.9272, GNorm = 0.8143, lr_0 = 1.4018e-04
Loss = 7.6137e-02, PNorm = 84.9290, GNorm = 0.6607, lr_0 = 1.4009e-04
Loss = 8.9681e-02, PNorm = 84.9319, GNorm = 0.8215, lr_0 = 1.3999e-04
Loss = 8.1348e-02, PNorm = 84.9343, GNorm = 0.6466, lr_0 = 1.3990e-04
Loss = 7.5461e-02, PNorm = 84.9365, GNorm = 0.5399, lr_0 = 1.3980e-04
Loss = 7.6411e-02, PNorm = 84.9382, GNorm = 0.7978, lr_0 = 1.3970e-04
Loss = 7.5381e-02, PNorm = 84.9385, GNorm = 0.6626, lr_0 = 1.3961e-04
Loss = 8.0342e-02, PNorm = 84.9388, GNorm = 0.5791, lr_0 = 1.3951e-04
Loss = 8.6057e-02, PNorm = 84.9417, GNorm = 0.6147, lr_0 = 1.3942e-04
Loss = 8.1718e-02, PNorm = 84.9452, GNorm = 0.6619, lr_0 = 1.3932e-04
Loss = 8.7598e-02, PNorm = 84.9467, GNorm = 0.7445, lr_0 = 1.3923e-04
Loss = 7.9399e-02, PNorm = 84.9498, GNorm = 0.6982, lr_0 = 1.3913e-04
Loss = 7.8220e-02, PNorm = 84.9513, GNorm = 0.5961, lr_0 = 1.3904e-04
Loss = 8.0457e-02, PNorm = 84.9539, GNorm = 0.5654, lr_0 = 1.3894e-04
Validation mae = 0.227010
Epoch 26
Loss = 7.6693e-02, PNorm = 84.9546, GNorm = 0.6659, lr_0 = 1.3884e-04
Loss = 7.6204e-02, PNorm = 84.9564, GNorm = 0.6722, lr_0 = 1.3875e-04
Loss = 7.3621e-02, PNorm = 84.9597, GNorm = 0.5969, lr_0 = 1.3865e-04
Loss = 7.6829e-02, PNorm = 84.9614, GNorm = 0.5317, lr_0 = 1.3856e-04
Loss = 7.7145e-02, PNorm = 84.9617, GNorm = 0.6252, lr_0 = 1.3846e-04
Loss = 8.3759e-02, PNorm = 84.9647, GNorm = 0.8651, lr_0 = 1.3837e-04
Loss = 8.4163e-02, PNorm = 84.9670, GNorm = 0.8559, lr_0 = 1.3828e-04
Loss = 8.9554e-02, PNorm = 84.9685, GNorm = 0.8902, lr_0 = 1.3818e-04
Loss = 7.4822e-02, PNorm = 84.9722, GNorm = 0.5761, lr_0 = 1.3809e-04
Loss = 7.7116e-02, PNorm = 84.9744, GNorm = 0.7815, lr_0 = 1.3799e-04
Loss = 7.5357e-02, PNorm = 84.9749, GNorm = 0.5455, lr_0 = 1.3790e-04
Loss = 7.4540e-02, PNorm = 84.9758, GNorm = 0.7960, lr_0 = 1.3780e-04
Loss = 8.0280e-02, PNorm = 84.9771, GNorm = 0.6797, lr_0 = 1.3771e-04
Loss = 7.6964e-02, PNorm = 84.9796, GNorm = 0.5866, lr_0 = 1.3761e-04
Loss = 8.0846e-02, PNorm = 84.9833, GNorm = 0.9793, lr_0 = 1.3752e-04
Loss = 7.0875e-02, PNorm = 84.9851, GNorm = 0.4117, lr_0 = 1.3742e-04
Loss = 6.9064e-02, PNorm = 84.9858, GNorm = 0.8522, lr_0 = 1.3733e-04
Loss = 8.4814e-02, PNorm = 84.9874, GNorm = 0.6238, lr_0 = 1.3724e-04
Loss = 8.1928e-02, PNorm = 84.9895, GNorm = 0.8546, lr_0 = 1.3714e-04
Loss = 8.1141e-02, PNorm = 84.9914, GNorm = 0.7208, lr_0 = 1.3705e-04
Loss = 8.5787e-02, PNorm = 84.9940, GNorm = 0.7640, lr_0 = 1.3695e-04
Loss = 8.0898e-02, PNorm = 84.9963, GNorm = 0.7129, lr_0 = 1.3686e-04
Loss = 7.1097e-02, PNorm = 84.9960, GNorm = 0.6239, lr_0 = 1.3677e-04
Loss = 7.6491e-02, PNorm = 84.9977, GNorm = 0.4920, lr_0 = 1.3667e-04
Loss = 7.5492e-02, PNorm = 84.9991, GNorm = 0.6231, lr_0 = 1.3658e-04
Loss = 7.9113e-02, PNorm = 85.0008, GNorm = 0.5413, lr_0 = 1.3649e-04
Loss = 7.8693e-02, PNorm = 85.0048, GNorm = 0.7063, lr_0 = 1.3639e-04
Loss = 8.4843e-02, PNorm = 85.0076, GNorm = 0.5444, lr_0 = 1.3630e-04
Loss = 8.0251e-02, PNorm = 85.0099, GNorm = 0.5112, lr_0 = 1.3621e-04
Loss = 8.4240e-02, PNorm = 85.0113, GNorm = 0.6480, lr_0 = 1.3611e-04
Loss = 8.6646e-02, PNorm = 85.0142, GNorm = 0.6178, lr_0 = 1.3602e-04
Loss = 6.9897e-02, PNorm = 85.0181, GNorm = 0.4347, lr_0 = 1.3593e-04
Loss = 7.0964e-02, PNorm = 85.0219, GNorm = 0.5626, lr_0 = 1.3583e-04
Loss = 7.3259e-02, PNorm = 85.0259, GNorm = 0.5917, lr_0 = 1.3574e-04
Loss = 7.1784e-02, PNorm = 85.0283, GNorm = 0.5864, lr_0 = 1.3565e-04
Loss = 8.2313e-02, PNorm = 85.0307, GNorm = 0.5432, lr_0 = 1.3555e-04
Loss = 7.9514e-02, PNorm = 85.0322, GNorm = 0.6539, lr_0 = 1.3546e-04
Loss = 6.9764e-02, PNorm = 85.0338, GNorm = 0.5743, lr_0 = 1.3537e-04
Loss = 8.1610e-02, PNorm = 85.0347, GNorm = 0.5354, lr_0 = 1.3528e-04
Loss = 8.4649e-02, PNorm = 85.0355, GNorm = 0.7816, lr_0 = 1.3518e-04
Loss = 8.9163e-02, PNorm = 85.0355, GNorm = 0.5661, lr_0 = 1.3509e-04
Loss = 7.0537e-02, PNorm = 85.0375, GNorm = 0.5538, lr_0 = 1.3500e-04
Loss = 7.4848e-02, PNorm = 85.0396, GNorm = 0.7959, lr_0 = 1.3491e-04
Loss = 7.8966e-02, PNorm = 85.0386, GNorm = 0.6037, lr_0 = 1.3481e-04
Loss = 7.4521e-02, PNorm = 85.0404, GNorm = 0.6295, lr_0 = 1.3472e-04
Loss = 7.0984e-02, PNorm = 85.0430, GNorm = 0.4861, lr_0 = 1.3463e-04
Loss = 6.9888e-02, PNorm = 85.0449, GNorm = 0.4683, lr_0 = 1.3454e-04
Loss = 6.9797e-02, PNorm = 85.0465, GNorm = 0.5948, lr_0 = 1.3444e-04
Loss = 7.4835e-02, PNorm = 85.0478, GNorm = 0.8232, lr_0 = 1.3435e-04
Loss = 8.5184e-02, PNorm = 85.0490, GNorm = 0.7081, lr_0 = 1.3426e-04
Loss = 8.5554e-02, PNorm = 85.0519, GNorm = 0.8258, lr_0 = 1.3417e-04
Loss = 6.7555e-02, PNorm = 85.0532, GNorm = 0.5178, lr_0 = 1.3408e-04
Loss = 8.6720e-02, PNorm = 85.0547, GNorm = 0.6409, lr_0 = 1.3398e-04
Loss = 6.7520e-02, PNorm = 85.0564, GNorm = 0.4669, lr_0 = 1.3389e-04
Loss = 8.2671e-02, PNorm = 85.0563, GNorm = 0.5755, lr_0 = 1.3380e-04
Loss = 6.7884e-02, PNorm = 85.0569, GNorm = 0.8910, lr_0 = 1.3371e-04
Loss = 7.3686e-02, PNorm = 85.0569, GNorm = 0.6435, lr_0 = 1.3362e-04
Loss = 9.0478e-02, PNorm = 85.0580, GNorm = 0.6783, lr_0 = 1.3353e-04
Loss = 8.9285e-02, PNorm = 85.0601, GNorm = 0.9441, lr_0 = 1.3343e-04
Loss = 8.4089e-02, PNorm = 85.0658, GNorm = 0.8700, lr_0 = 1.3334e-04
Loss = 8.2521e-02, PNorm = 85.0692, GNorm = 0.5489, lr_0 = 1.3325e-04
Loss = 7.9160e-02, PNorm = 85.0718, GNorm = 0.6404, lr_0 = 1.3316e-04
Loss = 7.3353e-02, PNorm = 85.0754, GNorm = 0.5081, lr_0 = 1.3307e-04
Loss = 7.7359e-02, PNorm = 85.0767, GNorm = 0.6610, lr_0 = 1.3298e-04
Loss = 8.3011e-02, PNorm = 85.0793, GNorm = 0.7373, lr_0 = 1.3289e-04
Loss = 8.8368e-02, PNorm = 85.0815, GNorm = 0.6006, lr_0 = 1.3280e-04
Loss = 7.8506e-02, PNorm = 85.0846, GNorm = 0.7193, lr_0 = 1.3270e-04
Loss = 8.2011e-02, PNorm = 85.0865, GNorm = 0.7290, lr_0 = 1.3261e-04
Loss = 6.5893e-02, PNorm = 85.0877, GNorm = 0.5210, lr_0 = 1.3252e-04
Loss = 8.6578e-02, PNorm = 85.0918, GNorm = 0.6560, lr_0 = 1.3243e-04
Loss = 8.3074e-02, PNorm = 85.0946, GNorm = 0.5667, lr_0 = 1.3234e-04
Loss = 8.8414e-02, PNorm = 85.0958, GNorm = 0.7882, lr_0 = 1.3225e-04
Loss = 7.2212e-02, PNorm = 85.0957, GNorm = 0.6298, lr_0 = 1.3216e-04
Loss = 7.7447e-02, PNorm = 85.0973, GNorm = 0.5523, lr_0 = 1.3207e-04
Loss = 7.4080e-02, PNorm = 85.0983, GNorm = 0.6796, lr_0 = 1.3198e-04
Loss = 6.9929e-02, PNorm = 85.0988, GNorm = 0.6937, lr_0 = 1.3189e-04
Loss = 7.5481e-02, PNorm = 85.1009, GNorm = 0.5193, lr_0 = 1.3180e-04
Loss = 7.7119e-02, PNorm = 85.1040, GNorm = 0.5647, lr_0 = 1.3171e-04
Loss = 8.2432e-02, PNorm = 85.1061, GNorm = 0.5189, lr_0 = 1.3162e-04
Loss = 8.5466e-02, PNorm = 85.1075, GNorm = 0.6357, lr_0 = 1.3153e-04
Loss = 8.4127e-02, PNorm = 85.1111, GNorm = 0.7238, lr_0 = 1.3144e-04
Loss = 7.5796e-02, PNorm = 85.1147, GNorm = 0.5770, lr_0 = 1.3135e-04
Loss = 7.4895e-02, PNorm = 85.1160, GNorm = 0.7158, lr_0 = 1.3126e-04
Loss = 8.8375e-02, PNorm = 85.1180, GNorm = 0.7399, lr_0 = 1.3117e-04
Loss = 7.7190e-02, PNorm = 85.1203, GNorm = 0.6309, lr_0 = 1.3108e-04
Loss = 7.4053e-02, PNorm = 85.1231, GNorm = 0.6549, lr_0 = 1.3099e-04
Loss = 6.3425e-02, PNorm = 85.1256, GNorm = 0.3835, lr_0 = 1.3090e-04
Loss = 8.9504e-02, PNorm = 85.1298, GNorm = 0.8319, lr_0 = 1.3081e-04
Loss = 7.6189e-02, PNorm = 85.1322, GNorm = 0.8259, lr_0 = 1.3072e-04
Loss = 8.2752e-02, PNorm = 85.1324, GNorm = 0.7867, lr_0 = 1.3063e-04
Loss = 7.5300e-02, PNorm = 85.1314, GNorm = 0.5222, lr_0 = 1.3054e-04
Loss = 7.8042e-02, PNorm = 85.1315, GNorm = 0.6579, lr_0 = 1.3045e-04
Loss = 7.5631e-02, PNorm = 85.1319, GNorm = 0.4660, lr_0 = 1.3036e-04
Loss = 7.4403e-02, PNorm = 85.1335, GNorm = 0.4308, lr_0 = 1.3027e-04
Loss = 7.4888e-02, PNorm = 85.1349, GNorm = 0.8675, lr_0 = 1.3018e-04
Loss = 7.9716e-02, PNorm = 85.1372, GNorm = 0.6812, lr_0 = 1.3009e-04
Loss = 9.2256e-02, PNorm = 85.1404, GNorm = 0.6185, lr_0 = 1.3000e-04
Loss = 7.7056e-02, PNorm = 85.1440, GNorm = 0.7030, lr_0 = 1.2992e-04
Loss = 7.6764e-02, PNorm = 85.1450, GNorm = 0.8103, lr_0 = 1.2983e-04
Loss = 8.2291e-02, PNorm = 85.1485, GNorm = 0.6241, lr_0 = 1.2974e-04
Loss = 8.4006e-02, PNorm = 85.1522, GNorm = 0.6532, lr_0 = 1.2965e-04
Loss = 6.9972e-02, PNorm = 85.1541, GNorm = 0.4996, lr_0 = 1.2956e-04
Loss = 8.2647e-02, PNorm = 85.1564, GNorm = 0.6853, lr_0 = 1.2947e-04
Loss = 8.2347e-02, PNorm = 85.1582, GNorm = 0.8848, lr_0 = 1.2938e-04
Loss = 7.8992e-02, PNorm = 85.1596, GNorm = 0.6040, lr_0 = 1.2929e-04
Loss = 8.3829e-02, PNorm = 85.1623, GNorm = 0.6779, lr_0 = 1.2921e-04
Loss = 8.8732e-02, PNorm = 85.1658, GNorm = 0.5941, lr_0 = 1.2912e-04
Loss = 8.0239e-02, PNorm = 85.1688, GNorm = 0.6414, lr_0 = 1.2903e-04
Loss = 8.5571e-02, PNorm = 85.1711, GNorm = 0.4873, lr_0 = 1.2894e-04
Loss = 9.0354e-02, PNorm = 85.1720, GNorm = 0.9441, lr_0 = 1.2885e-04
Loss = 7.6756e-02, PNorm = 85.1736, GNorm = 0.7815, lr_0 = 1.2876e-04
Loss = 7.6821e-02, PNorm = 85.1748, GNorm = 0.5048, lr_0 = 1.2867e-04
Loss = 8.2924e-02, PNorm = 85.1765, GNorm = 0.5111, lr_0 = 1.2859e-04
Loss = 8.3573e-02, PNorm = 85.1785, GNorm = 0.6828, lr_0 = 1.2850e-04
Loss = 6.8379e-02, PNorm = 85.1788, GNorm = 0.6150, lr_0 = 1.2841e-04
Loss = 8.0677e-02, PNorm = 85.1807, GNorm = 0.6065, lr_0 = 1.2832e-04
Loss = 8.6362e-02, PNorm = 85.1828, GNorm = 0.6378, lr_0 = 1.2823e-04
Loss = 7.7389e-02, PNorm = 85.1835, GNorm = 0.7481, lr_0 = 1.2815e-04
Loss = 7.3514e-02, PNorm = 85.1832, GNorm = 0.6280, lr_0 = 1.2806e-04
Loss = 8.3842e-02, PNorm = 85.1862, GNorm = 0.8940, lr_0 = 1.2797e-04
Validation mae = 0.233336
Epoch 27
Loss = 7.8829e-02, PNorm = 85.1879, GNorm = 0.5514, lr_0 = 1.2788e-04
Loss = 7.5470e-02, PNorm = 85.1900, GNorm = 0.8644, lr_0 = 1.2780e-04
Loss = 6.8244e-02, PNorm = 85.1930, GNorm = 0.6824, lr_0 = 1.2771e-04
Loss = 7.5854e-02, PNorm = 85.1952, GNorm = 0.6780, lr_0 = 1.2762e-04
Loss = 7.5246e-02, PNorm = 85.1960, GNorm = 0.6109, lr_0 = 1.2753e-04
Loss = 7.9762e-02, PNorm = 85.1968, GNorm = 0.5488, lr_0 = 1.2745e-04
Loss = 6.6585e-02, PNorm = 85.1982, GNorm = 0.6484, lr_0 = 1.2736e-04
Loss = 6.7225e-02, PNorm = 85.1995, GNorm = 0.5033, lr_0 = 1.2727e-04
Loss = 6.7835e-02, PNorm = 85.2009, GNorm = 0.6494, lr_0 = 1.2718e-04
Loss = 7.5888e-02, PNorm = 85.2026, GNorm = 0.7533, lr_0 = 1.2710e-04
Loss = 7.8626e-02, PNorm = 85.2037, GNorm = 0.6628, lr_0 = 1.2701e-04
Loss = 9.4408e-02, PNorm = 85.2069, GNorm = 0.8447, lr_0 = 1.2692e-04
Loss = 7.8259e-02, PNorm = 85.2101, GNorm = 0.4886, lr_0 = 1.2684e-04
Loss = 8.1894e-02, PNorm = 85.2118, GNorm = 0.6064, lr_0 = 1.2675e-04
Loss = 7.5754e-02, PNorm = 85.2142, GNorm = 0.5549, lr_0 = 1.2666e-04
Loss = 8.4852e-02, PNorm = 85.2167, GNorm = 0.6823, lr_0 = 1.2658e-04
Loss = 6.9408e-02, PNorm = 85.2183, GNorm = 0.4574, lr_0 = 1.2649e-04
Loss = 6.8025e-02, PNorm = 85.2199, GNorm = 0.5950, lr_0 = 1.2640e-04
Loss = 8.2629e-02, PNorm = 85.2220, GNorm = 0.6211, lr_0 = 1.2632e-04
Loss = 6.9719e-02, PNorm = 85.2236, GNorm = 0.5809, lr_0 = 1.2623e-04
Loss = 6.6408e-02, PNorm = 85.2253, GNorm = 0.5485, lr_0 = 1.2614e-04
Loss = 6.9566e-02, PNorm = 85.2266, GNorm = 0.5623, lr_0 = 1.2606e-04
Loss = 8.5067e-02, PNorm = 85.2278, GNorm = 0.5648, lr_0 = 1.2597e-04
Loss = 7.8614e-02, PNorm = 85.2314, GNorm = 0.7677, lr_0 = 1.2588e-04
Loss = 7.5238e-02, PNorm = 85.2357, GNorm = 0.5078, lr_0 = 1.2580e-04
Loss = 8.1424e-02, PNorm = 85.2374, GNorm = 0.8200, lr_0 = 1.2571e-04
Loss = 7.1232e-02, PNorm = 85.2396, GNorm = 0.6478, lr_0 = 1.2563e-04
Loss = 8.0572e-02, PNorm = 85.2410, GNorm = 0.7902, lr_0 = 1.2554e-04
Loss = 7.7028e-02, PNorm = 85.2423, GNorm = 0.7179, lr_0 = 1.2545e-04
Loss = 6.5462e-02, PNorm = 85.2434, GNorm = 0.5702, lr_0 = 1.2537e-04
Loss = 8.2140e-02, PNorm = 85.2440, GNorm = 0.8392, lr_0 = 1.2528e-04
Loss = 7.7162e-02, PNorm = 85.2455, GNorm = 0.7621, lr_0 = 1.2520e-04
Loss = 7.2478e-02, PNorm = 85.2476, GNorm = 0.6806, lr_0 = 1.2511e-04
Loss = 7.3813e-02, PNorm = 85.2499, GNorm = 0.5740, lr_0 = 1.2502e-04
Loss = 7.5894e-02, PNorm = 85.2529, GNorm = 0.7000, lr_0 = 1.2494e-04
Loss = 7.8009e-02, PNorm = 85.2558, GNorm = 0.6110, lr_0 = 1.2485e-04
Loss = 8.7801e-02, PNorm = 85.2561, GNorm = 0.6340, lr_0 = 1.2477e-04
Loss = 8.6741e-02, PNorm = 85.2548, GNorm = 0.6059, lr_0 = 1.2468e-04
Loss = 8.4912e-02, PNorm = 85.2561, GNorm = 0.5995, lr_0 = 1.2460e-04
Loss = 7.4300e-02, PNorm = 85.2572, GNorm = 0.4847, lr_0 = 1.2451e-04
Loss = 8.3779e-02, PNorm = 85.2589, GNorm = 0.5148, lr_0 = 1.2443e-04
Loss = 8.3021e-02, PNorm = 85.2602, GNorm = 0.9646, lr_0 = 1.2434e-04
Loss = 7.6621e-02, PNorm = 85.2608, GNorm = 0.4894, lr_0 = 1.2426e-04
Loss = 7.0409e-02, PNorm = 85.2624, GNorm = 0.5087, lr_0 = 1.2417e-04
Loss = 7.7118e-02, PNorm = 85.2631, GNorm = 0.6833, lr_0 = 1.2409e-04
Loss = 9.6157e-02, PNorm = 85.2654, GNorm = 0.5761, lr_0 = 1.2400e-04
Loss = 8.6397e-02, PNorm = 85.2689, GNorm = 0.6900, lr_0 = 1.2392e-04
Loss = 7.5238e-02, PNorm = 85.2711, GNorm = 0.5700, lr_0 = 1.2383e-04
Loss = 7.8406e-02, PNorm = 85.2715, GNorm = 0.7886, lr_0 = 1.2375e-04
Loss = 8.2168e-02, PNorm = 85.2737, GNorm = 0.5807, lr_0 = 1.2366e-04
Loss = 7.7748e-02, PNorm = 85.2767, GNorm = 0.8889, lr_0 = 1.2358e-04
Loss = 7.9847e-02, PNorm = 85.2788, GNorm = 0.8568, lr_0 = 1.2349e-04
Loss = 6.7363e-02, PNorm = 85.2808, GNorm = 0.5557, lr_0 = 1.2341e-04
Loss = 7.0670e-02, PNorm = 85.2813, GNorm = 0.7197, lr_0 = 1.2332e-04
Loss = 7.5416e-02, PNorm = 85.2824, GNorm = 0.5714, lr_0 = 1.2324e-04
Loss = 7.1619e-02, PNorm = 85.2841, GNorm = 0.5451, lr_0 = 1.2315e-04
Loss = 7.6792e-02, PNorm = 85.2855, GNorm = 0.6689, lr_0 = 1.2307e-04
Loss = 7.7076e-02, PNorm = 85.2876, GNorm = 0.7276, lr_0 = 1.2298e-04
Loss = 8.1161e-02, PNorm = 85.2886, GNorm = 0.6479, lr_0 = 1.2290e-04
Loss = 7.5094e-02, PNorm = 85.2890, GNorm = 0.5229, lr_0 = 1.2282e-04
Loss = 6.9776e-02, PNorm = 85.2891, GNorm = 0.4786, lr_0 = 1.2273e-04
Loss = 7.4789e-02, PNorm = 85.2907, GNorm = 0.7082, lr_0 = 1.2265e-04
Loss = 8.7972e-02, PNorm = 85.2932, GNorm = 0.5991, lr_0 = 1.2256e-04
Loss = 7.4771e-02, PNorm = 85.2947, GNorm = 0.8379, lr_0 = 1.2248e-04
Loss = 7.3759e-02, PNorm = 85.2961, GNorm = 0.5903, lr_0 = 1.2240e-04
Loss = 8.4317e-02, PNorm = 85.2978, GNorm = 0.7031, lr_0 = 1.2231e-04
Loss = 7.7260e-02, PNorm = 85.2996, GNorm = 0.6953, lr_0 = 1.2223e-04
Loss = 7.2012e-02, PNorm = 85.3015, GNorm = 0.6134, lr_0 = 1.2214e-04
Loss = 7.4939e-02, PNorm = 85.3037, GNorm = 0.6328, lr_0 = 1.2206e-04
Loss = 8.3135e-02, PNorm = 85.3052, GNorm = 0.6299, lr_0 = 1.2198e-04
Loss = 7.6651e-02, PNorm = 85.3073, GNorm = 0.6242, lr_0 = 1.2189e-04
Loss = 6.9478e-02, PNorm = 85.3097, GNorm = 0.4971, lr_0 = 1.2181e-04
Loss = 8.5071e-02, PNorm = 85.3117, GNorm = 1.0275, lr_0 = 1.2173e-04
Loss = 7.9931e-02, PNorm = 85.3122, GNorm = 0.5130, lr_0 = 1.2164e-04
Loss = 7.9884e-02, PNorm = 85.3134, GNorm = 0.7110, lr_0 = 1.2156e-04
Loss = 7.7002e-02, PNorm = 85.3156, GNorm = 0.5582, lr_0 = 1.2148e-04
Loss = 8.6458e-02, PNorm = 85.3182, GNorm = 0.7398, lr_0 = 1.2139e-04
Loss = 8.6599e-02, PNorm = 85.3210, GNorm = 0.6389, lr_0 = 1.2131e-04
Loss = 6.7927e-02, PNorm = 85.3231, GNorm = 0.8030, lr_0 = 1.2123e-04
Loss = 8.1750e-02, PNorm = 85.3247, GNorm = 0.7561, lr_0 = 1.2114e-04
Loss = 8.2980e-02, PNorm = 85.3247, GNorm = 0.7112, lr_0 = 1.2106e-04
Loss = 7.9623e-02, PNorm = 85.3275, GNorm = 0.6512, lr_0 = 1.2098e-04
Loss = 7.5139e-02, PNorm = 85.3307, GNorm = 0.7555, lr_0 = 1.2090e-04
Loss = 7.8657e-02, PNorm = 85.3318, GNorm = 0.4843, lr_0 = 1.2081e-04
Loss = 9.1793e-02, PNorm = 85.3353, GNorm = 0.7367, lr_0 = 1.2073e-04
Loss = 7.7640e-02, PNorm = 85.3388, GNorm = 0.6146, lr_0 = 1.2065e-04
Loss = 8.8229e-02, PNorm = 85.3413, GNorm = 0.5763, lr_0 = 1.2056e-04
Loss = 8.5877e-02, PNorm = 85.3424, GNorm = 0.7324, lr_0 = 1.2048e-04
Loss = 7.9686e-02, PNorm = 85.3442, GNorm = 0.5964, lr_0 = 1.2040e-04
Loss = 6.8529e-02, PNorm = 85.3464, GNorm = 0.6051, lr_0 = 1.2032e-04
Loss = 8.4134e-02, PNorm = 85.3484, GNorm = 0.6784, lr_0 = 1.2023e-04
Loss = 7.4719e-02, PNorm = 85.3520, GNorm = 0.5980, lr_0 = 1.2015e-04
Loss = 8.5056e-02, PNorm = 85.3526, GNorm = 0.7004, lr_0 = 1.2007e-04
Loss = 8.4172e-02, PNorm = 85.3558, GNorm = 0.6538, lr_0 = 1.1999e-04
Loss = 8.2892e-02, PNorm = 85.3580, GNorm = 0.8117, lr_0 = 1.1991e-04
Loss = 7.5507e-02, PNorm = 85.3580, GNorm = 0.5498, lr_0 = 1.1982e-04
Loss = 8.0673e-02, PNorm = 85.3590, GNorm = 0.5305, lr_0 = 1.1974e-04
Loss = 7.8755e-02, PNorm = 85.3604, GNorm = 0.5231, lr_0 = 1.1966e-04
Loss = 6.9457e-02, PNorm = 85.3615, GNorm = 0.6229, lr_0 = 1.1958e-04
Loss = 7.5854e-02, PNorm = 85.3638, GNorm = 0.6352, lr_0 = 1.1950e-04
Loss = 8.8699e-02, PNorm = 85.3658, GNorm = 0.9281, lr_0 = 1.1941e-04
Loss = 7.8676e-02, PNorm = 85.3668, GNorm = 0.6501, lr_0 = 1.1933e-04
Loss = 7.4666e-02, PNorm = 85.3684, GNorm = 0.6327, lr_0 = 1.1925e-04
Loss = 8.4752e-02, PNorm = 85.3688, GNorm = 0.8829, lr_0 = 1.1917e-04
Loss = 8.6004e-02, PNorm = 85.3703, GNorm = 0.5521, lr_0 = 1.1909e-04
Loss = 7.4551e-02, PNorm = 85.3738, GNorm = 0.6072, lr_0 = 1.1901e-04
Loss = 8.2083e-02, PNorm = 85.3774, GNorm = 0.6140, lr_0 = 1.1892e-04
Loss = 7.3104e-02, PNorm = 85.3789, GNorm = 0.5942, lr_0 = 1.1884e-04
Loss = 8.4932e-02, PNorm = 85.3787, GNorm = 0.5455, lr_0 = 1.1876e-04
Loss = 7.5861e-02, PNorm = 85.3798, GNorm = 0.5045, lr_0 = 1.1868e-04
Loss = 7.3037e-02, PNorm = 85.3810, GNorm = 0.6846, lr_0 = 1.1860e-04
Loss = 7.7066e-02, PNorm = 85.3814, GNorm = 0.6493, lr_0 = 1.1852e-04
Loss = 8.0463e-02, PNorm = 85.3804, GNorm = 0.6848, lr_0 = 1.1844e-04
Loss = 7.6513e-02, PNorm = 85.3808, GNorm = 0.6531, lr_0 = 1.1835e-04
Loss = 7.6167e-02, PNorm = 85.3829, GNorm = 0.8135, lr_0 = 1.1827e-04
Loss = 7.8465e-02, PNorm = 85.3844, GNorm = 0.6829, lr_0 = 1.1819e-04
Loss = 7.8789e-02, PNorm = 85.3857, GNorm = 0.8277, lr_0 = 1.1811e-04
Loss = 7.9241e-02, PNorm = 85.3861, GNorm = 0.7944, lr_0 = 1.1803e-04
Loss = 7.3795e-02, PNorm = 85.3883, GNorm = 0.6798, lr_0 = 1.1795e-04
Loss = 7.8100e-02, PNorm = 85.3907, GNorm = 0.5839, lr_0 = 1.1787e-04
Validation mae = 0.230407
Epoch 28
Loss = 9.0545e-02, PNorm = 85.3924, GNorm = 0.7032, lr_0 = 1.1779e-04
Loss = 7.1447e-02, PNorm = 85.3955, GNorm = 0.6105, lr_0 = 1.1771e-04
Loss = 6.0987e-02, PNorm = 85.3984, GNorm = 0.4595, lr_0 = 1.1763e-04
Loss = 7.9914e-02, PNorm = 85.4003, GNorm = 0.5907, lr_0 = 1.1755e-04
Loss = 7.1082e-02, PNorm = 85.4023, GNorm = 0.6686, lr_0 = 1.1747e-04
Loss = 7.3405e-02, PNorm = 85.4030, GNorm = 0.5609, lr_0 = 1.1739e-04
Loss = 8.3230e-02, PNorm = 85.4048, GNorm = 0.8025, lr_0 = 1.1730e-04
Loss = 7.3304e-02, PNorm = 85.4061, GNorm = 0.6951, lr_0 = 1.1722e-04
Loss = 6.9040e-02, PNorm = 85.4076, GNorm = 0.5776, lr_0 = 1.1714e-04
Loss = 7.0788e-02, PNorm = 85.4107, GNorm = 0.6450, lr_0 = 1.1706e-04
Loss = 7.6154e-02, PNorm = 85.4125, GNorm = 0.7600, lr_0 = 1.1698e-04
Loss = 7.0504e-02, PNorm = 85.4147, GNorm = 0.4875, lr_0 = 1.1690e-04
Loss = 7.3388e-02, PNorm = 85.4178, GNorm = 0.4850, lr_0 = 1.1682e-04
Loss = 7.5928e-02, PNorm = 85.4189, GNorm = 0.6977, lr_0 = 1.1674e-04
Loss = 6.4771e-02, PNorm = 85.4197, GNorm = 0.5065, lr_0 = 1.1666e-04
Loss = 6.5329e-02, PNorm = 85.4220, GNorm = 0.5074, lr_0 = 1.1658e-04
Loss = 7.6551e-02, PNorm = 85.4245, GNorm = 0.5860, lr_0 = 1.1650e-04
Loss = 6.8016e-02, PNorm = 85.4252, GNorm = 0.6424, lr_0 = 1.1642e-04
Loss = 6.5070e-02, PNorm = 85.4263, GNorm = 0.5426, lr_0 = 1.1634e-04
Loss = 8.8962e-02, PNorm = 85.4273, GNorm = 0.6882, lr_0 = 1.1626e-04
Loss = 7.7404e-02, PNorm = 85.4279, GNorm = 0.6507, lr_0 = 1.1618e-04
Loss = 6.4715e-02, PNorm = 85.4290, GNorm = 0.5645, lr_0 = 1.1611e-04
Loss = 8.3408e-02, PNorm = 85.4309, GNorm = 0.7211, lr_0 = 1.1603e-04
Loss = 9.4865e-02, PNorm = 85.4312, GNorm = 0.8520, lr_0 = 1.1595e-04
Loss = 6.4621e-02, PNorm = 85.4326, GNorm = 0.7094, lr_0 = 1.1587e-04
Loss = 8.3081e-02, PNorm = 85.4347, GNorm = 0.6212, lr_0 = 1.1579e-04
Loss = 6.6047e-02, PNorm = 85.4364, GNorm = 0.8610, lr_0 = 1.1571e-04
Loss = 8.5705e-02, PNorm = 85.4389, GNorm = 0.5843, lr_0 = 1.1563e-04
Loss = 7.2581e-02, PNorm = 85.4416, GNorm = 0.7395, lr_0 = 1.1555e-04
Loss = 6.4811e-02, PNorm = 85.4415, GNorm = 0.6626, lr_0 = 1.1547e-04
Loss = 7.3165e-02, PNorm = 85.4420, GNorm = 0.7485, lr_0 = 1.1539e-04
Loss = 7.9989e-02, PNorm = 85.4433, GNorm = 0.9150, lr_0 = 1.1531e-04
Loss = 7.1583e-02, PNorm = 85.4437, GNorm = 0.8425, lr_0 = 1.1523e-04
Loss = 8.3485e-02, PNorm = 85.4444, GNorm = 0.6492, lr_0 = 1.1515e-04
Loss = 8.0441e-02, PNorm = 85.4466, GNorm = 0.4775, lr_0 = 1.1508e-04
Loss = 8.0141e-02, PNorm = 85.4484, GNorm = 0.5520, lr_0 = 1.1500e-04
Loss = 7.6095e-02, PNorm = 85.4480, GNorm = 0.5397, lr_0 = 1.1492e-04
Loss = 7.8658e-02, PNorm = 85.4489, GNorm = 0.7526, lr_0 = 1.1484e-04
Loss = 6.7460e-02, PNorm = 85.4505, GNorm = 0.6344, lr_0 = 1.1476e-04
Loss = 7.9629e-02, PNorm = 85.4527, GNorm = 0.6926, lr_0 = 1.1468e-04
Loss = 7.5138e-02, PNorm = 85.4542, GNorm = 0.4897, lr_0 = 1.1460e-04
Loss = 7.5456e-02, PNorm = 85.4550, GNorm = 0.5372, lr_0 = 1.1452e-04
Loss = 7.4465e-02, PNorm = 85.4566, GNorm = 0.5991, lr_0 = 1.1445e-04
Loss = 8.9482e-02, PNorm = 85.4589, GNorm = 0.9800, lr_0 = 1.1437e-04
Loss = 8.4850e-02, PNorm = 85.4619, GNorm = 0.6810, lr_0 = 1.1429e-04
Loss = 7.0068e-02, PNorm = 85.4632, GNorm = 0.6415, lr_0 = 1.1421e-04
Loss = 6.9203e-02, PNorm = 85.4640, GNorm = 0.5839, lr_0 = 1.1413e-04
Loss = 8.1732e-02, PNorm = 85.4665, GNorm = 0.7725, lr_0 = 1.1405e-04
Loss = 7.9224e-02, PNorm = 85.4681, GNorm = 0.6035, lr_0 = 1.1398e-04
Loss = 7.8482e-02, PNorm = 85.4682, GNorm = 0.7881, lr_0 = 1.1390e-04
Loss = 8.7037e-02, PNorm = 85.4709, GNorm = 0.6587, lr_0 = 1.1382e-04
Loss = 6.7736e-02, PNorm = 85.4733, GNorm = 0.5227, lr_0 = 1.1374e-04
Loss = 8.1713e-02, PNorm = 85.4752, GNorm = 0.6878, lr_0 = 1.1366e-04
Loss = 7.4235e-02, PNorm = 85.4765, GNorm = 0.7539, lr_0 = 1.1359e-04
Loss = 7.5790e-02, PNorm = 85.4768, GNorm = 0.5384, lr_0 = 1.1351e-04
Loss = 7.8017e-02, PNorm = 85.4778, GNorm = 0.6859, lr_0 = 1.1343e-04
Loss = 8.9517e-02, PNorm = 85.4801, GNorm = 0.5131, lr_0 = 1.1335e-04
Loss = 6.7332e-02, PNorm = 85.4820, GNorm = 0.6341, lr_0 = 1.1328e-04
Loss = 7.8734e-02, PNorm = 85.4831, GNorm = 0.7418, lr_0 = 1.1320e-04
Loss = 7.9309e-02, PNorm = 85.4843, GNorm = 0.7944, lr_0 = 1.1312e-04
Loss = 6.9492e-02, PNorm = 85.4873, GNorm = 0.5151, lr_0 = 1.1304e-04
Loss = 7.8739e-02, PNorm = 85.4870, GNorm = 0.6269, lr_0 = 1.1297e-04
Loss = 7.9732e-02, PNorm = 85.4880, GNorm = 0.5286, lr_0 = 1.1289e-04
Loss = 7.1971e-02, PNorm = 85.4902, GNorm = 0.6100, lr_0 = 1.1281e-04
Loss = 7.5262e-02, PNorm = 85.4927, GNorm = 0.5915, lr_0 = 1.1273e-04
Loss = 7.8354e-02, PNorm = 85.4932, GNorm = 0.6859, lr_0 = 1.1266e-04
Loss = 6.6001e-02, PNorm = 85.4942, GNorm = 0.6520, lr_0 = 1.1258e-04
Loss = 8.0067e-02, PNorm = 85.4959, GNorm = 0.6947, lr_0 = 1.1250e-04
Loss = 7.7680e-02, PNorm = 85.4980, GNorm = 0.7030, lr_0 = 1.1243e-04
Loss = 7.7024e-02, PNorm = 85.4983, GNorm = 0.5844, lr_0 = 1.1235e-04
Loss = 8.6308e-02, PNorm = 85.4999, GNorm = 0.5796, lr_0 = 1.1227e-04
Loss = 6.6939e-02, PNorm = 85.5018, GNorm = 0.5379, lr_0 = 1.1219e-04
Loss = 8.0335e-02, PNorm = 85.5019, GNorm = 0.5945, lr_0 = 1.1212e-04
Loss = 7.9388e-02, PNorm = 85.5025, GNorm = 0.5007, lr_0 = 1.1204e-04
Loss = 7.0527e-02, PNorm = 85.5039, GNorm = 0.6508, lr_0 = 1.1196e-04
Loss = 7.8867e-02, PNorm = 85.5052, GNorm = 0.5336, lr_0 = 1.1189e-04
Loss = 8.3564e-02, PNorm = 85.5082, GNorm = 0.6318, lr_0 = 1.1181e-04
Loss = 6.6062e-02, PNorm = 85.5109, GNorm = 0.5837, lr_0 = 1.1173e-04
Loss = 7.0114e-02, PNorm = 85.5117, GNorm = 0.7608, lr_0 = 1.1166e-04
Loss = 6.8795e-02, PNorm = 85.5130, GNorm = 0.7444, lr_0 = 1.1158e-04
Loss = 7.4954e-02, PNorm = 85.5151, GNorm = 0.9176, lr_0 = 1.1150e-04
Loss = 7.0693e-02, PNorm = 85.5167, GNorm = 0.4191, lr_0 = 1.1143e-04
Loss = 6.7577e-02, PNorm = 85.5183, GNorm = 0.5584, lr_0 = 1.1135e-04
Loss = 7.9013e-02, PNorm = 85.5191, GNorm = 0.8000, lr_0 = 1.1128e-04
Loss = 7.1078e-02, PNorm = 85.5204, GNorm = 0.6343, lr_0 = 1.1120e-04
Loss = 8.3654e-02, PNorm = 85.5224, GNorm = 0.6840, lr_0 = 1.1112e-04
Loss = 8.0166e-02, PNorm = 85.5253, GNorm = 0.8553, lr_0 = 1.1105e-04
Loss = 8.2242e-02, PNorm = 85.5274, GNorm = 0.6632, lr_0 = 1.1097e-04
Loss = 7.3217e-02, PNorm = 85.5274, GNorm = 0.5344, lr_0 = 1.1089e-04
Loss = 7.8034e-02, PNorm = 85.5271, GNorm = 0.4546, lr_0 = 1.1082e-04
Loss = 8.4892e-02, PNorm = 85.5285, GNorm = 0.8587, lr_0 = 1.1074e-04
Loss = 7.3303e-02, PNorm = 85.5308, GNorm = 0.6709, lr_0 = 1.1067e-04
Loss = 7.6487e-02, PNorm = 85.5331, GNorm = 0.5795, lr_0 = 1.1059e-04
Loss = 8.0905e-02, PNorm = 85.5353, GNorm = 0.7013, lr_0 = 1.1052e-04
Loss = 6.9446e-02, PNorm = 85.5380, GNorm = 0.6802, lr_0 = 1.1044e-04
Loss = 7.9704e-02, PNorm = 85.5415, GNorm = 0.8594, lr_0 = 1.1036e-04
Loss = 8.1066e-02, PNorm = 85.5442, GNorm = 0.6949, lr_0 = 1.1029e-04
Loss = 7.1974e-02, PNorm = 85.5460, GNorm = 0.5171, lr_0 = 1.1021e-04
Loss = 8.4587e-02, PNorm = 85.5467, GNorm = 0.5320, lr_0 = 1.1014e-04
Loss = 8.2425e-02, PNorm = 85.5467, GNorm = 0.6201, lr_0 = 1.1006e-04
Loss = 7.4291e-02, PNorm = 85.5464, GNorm = 0.5571, lr_0 = 1.0999e-04
Loss = 8.0445e-02, PNorm = 85.5463, GNorm = 0.7588, lr_0 = 1.0991e-04
Loss = 6.7938e-02, PNorm = 85.5475, GNorm = 0.6113, lr_0 = 1.0984e-04
Loss = 8.1112e-02, PNorm = 85.5479, GNorm = 0.7415, lr_0 = 1.0976e-04
Loss = 7.8689e-02, PNorm = 85.5489, GNorm = 0.6569, lr_0 = 1.0969e-04
Loss = 7.0172e-02, PNorm = 85.5516, GNorm = 0.9066, lr_0 = 1.0961e-04
Loss = 7.5039e-02, PNorm = 85.5539, GNorm = 0.8174, lr_0 = 1.0954e-04
Loss = 6.7658e-02, PNorm = 85.5576, GNorm = 0.5509, lr_0 = 1.0946e-04
Loss = 7.4584e-02, PNorm = 85.5587, GNorm = 0.5121, lr_0 = 1.0939e-04
Loss = 8.8325e-02, PNorm = 85.5598, GNorm = 0.6460, lr_0 = 1.0931e-04
Loss = 7.8418e-02, PNorm = 85.5604, GNorm = 0.7216, lr_0 = 1.0924e-04
Loss = 7.6760e-02, PNorm = 85.5609, GNorm = 0.4493, lr_0 = 1.0916e-04
Loss = 7.4376e-02, PNorm = 85.5638, GNorm = 0.4964, lr_0 = 1.0909e-04
Loss = 7.7229e-02, PNorm = 85.5646, GNorm = 0.8045, lr_0 = 1.0901e-04
Loss = 8.8097e-02, PNorm = 85.5653, GNorm = 0.5864, lr_0 = 1.0894e-04
Loss = 7.5596e-02, PNorm = 85.5671, GNorm = 0.5773, lr_0 = 1.0886e-04
Loss = 7.3152e-02, PNorm = 85.5694, GNorm = 0.5390, lr_0 = 1.0879e-04
Loss = 7.4963e-02, PNorm = 85.5693, GNorm = 0.9960, lr_0 = 1.0871e-04
Loss = 7.9077e-02, PNorm = 85.5698, GNorm = 0.5204, lr_0 = 1.0864e-04
Loss = 7.8131e-02, PNorm = 85.5717, GNorm = 0.6086, lr_0 = 1.0856e-04
Validation mae = 0.225736
Epoch 29
Loss = 7.1494e-02, PNorm = 85.5736, GNorm = 0.5832, lr_0 = 1.0849e-04
Loss = 7.2081e-02, PNorm = 85.5751, GNorm = 0.6469, lr_0 = 1.0841e-04
Loss = 6.1431e-02, PNorm = 85.5767, GNorm = 0.5654, lr_0 = 1.0834e-04
Loss = 6.5848e-02, PNorm = 85.5780, GNorm = 0.4940, lr_0 = 1.0827e-04
Loss = 8.3735e-02, PNorm = 85.5798, GNorm = 0.9610, lr_0 = 1.0819e-04
Loss = 6.8463e-02, PNorm = 85.5807, GNorm = 0.5402, lr_0 = 1.0812e-04
Loss = 7.5017e-02, PNorm = 85.5827, GNorm = 0.5952, lr_0 = 1.0804e-04
Loss = 7.3386e-02, PNorm = 85.5838, GNorm = 0.6632, lr_0 = 1.0797e-04
Loss = 7.5684e-02, PNorm = 85.5849, GNorm = 0.7322, lr_0 = 1.0790e-04
Loss = 7.7920e-02, PNorm = 85.5870, GNorm = 0.7922, lr_0 = 1.0782e-04
Loss = 7.3135e-02, PNorm = 85.5898, GNorm = 0.6108, lr_0 = 1.0775e-04
Loss = 7.7483e-02, PNorm = 85.5924, GNorm = 0.5644, lr_0 = 1.0767e-04
Loss = 6.5881e-02, PNorm = 85.5947, GNorm = 0.6654, lr_0 = 1.0760e-04
Loss = 7.7937e-02, PNorm = 85.5963, GNorm = 0.5756, lr_0 = 1.0753e-04
Loss = 8.5527e-02, PNorm = 85.5978, GNorm = 0.7963, lr_0 = 1.0745e-04
Loss = 7.6599e-02, PNorm = 85.5996, GNorm = 0.6690, lr_0 = 1.0738e-04
Loss = 7.4090e-02, PNorm = 85.6025, GNorm = 0.6231, lr_0 = 1.0731e-04
Loss = 8.6745e-02, PNorm = 85.6041, GNorm = 0.7125, lr_0 = 1.0723e-04
Loss = 6.8824e-02, PNorm = 85.6059, GNorm = 0.5882, lr_0 = 1.0716e-04
Loss = 7.1806e-02, PNorm = 85.6066, GNorm = 0.6489, lr_0 = 1.0709e-04
Loss = 6.4615e-02, PNorm = 85.6076, GNorm = 0.5591, lr_0 = 1.0701e-04
Loss = 7.4847e-02, PNorm = 85.6091, GNorm = 0.7122, lr_0 = 1.0694e-04
Loss = 7.9176e-02, PNorm = 85.6119, GNorm = 0.6173, lr_0 = 1.0687e-04
Loss = 7.1023e-02, PNorm = 85.6132, GNorm = 0.5143, lr_0 = 1.0679e-04
Loss = 7.1333e-02, PNorm = 85.6132, GNorm = 0.5173, lr_0 = 1.0672e-04
Loss = 7.6676e-02, PNorm = 85.6143, GNorm = 0.6480, lr_0 = 1.0665e-04
Loss = 7.4928e-02, PNorm = 85.6173, GNorm = 0.6032, lr_0 = 1.0657e-04
Loss = 7.8553e-02, PNorm = 85.6198, GNorm = 0.6240, lr_0 = 1.0650e-04
Loss = 8.0774e-02, PNorm = 85.6202, GNorm = 0.7703, lr_0 = 1.0643e-04
Loss = 8.7104e-02, PNorm = 85.6213, GNorm = 0.7625, lr_0 = 1.0635e-04
Loss = 6.9320e-02, PNorm = 85.6231, GNorm = 0.5288, lr_0 = 1.0628e-04
Loss = 7.6057e-02, PNorm = 85.6244, GNorm = 0.6157, lr_0 = 1.0621e-04
Loss = 6.7827e-02, PNorm = 85.6238, GNorm = 0.6818, lr_0 = 1.0614e-04
Loss = 7.4369e-02, PNorm = 85.6255, GNorm = 0.7429, lr_0 = 1.0606e-04
Loss = 6.9505e-02, PNorm = 85.6275, GNorm = 0.9532, lr_0 = 1.0599e-04
Loss = 7.4368e-02, PNorm = 85.6292, GNorm = 0.7579, lr_0 = 1.0592e-04
Loss = 7.6637e-02, PNorm = 85.6317, GNorm = 0.6225, lr_0 = 1.0585e-04
Loss = 8.2200e-02, PNorm = 85.6359, GNorm = 0.6170, lr_0 = 1.0577e-04
Loss = 7.7261e-02, PNorm = 85.6379, GNorm = 0.7863, lr_0 = 1.0570e-04
Loss = 8.1289e-02, PNorm = 85.6393, GNorm = 0.6931, lr_0 = 1.0563e-04
Loss = 6.8214e-02, PNorm = 85.6400, GNorm = 0.5642, lr_0 = 1.0556e-04
Loss = 7.9903e-02, PNorm = 85.6404, GNorm = 0.6169, lr_0 = 1.0548e-04
Loss = 7.2306e-02, PNorm = 85.6412, GNorm = 0.7950, lr_0 = 1.0541e-04
Loss = 7.6628e-02, PNorm = 85.6429, GNorm = 0.7224, lr_0 = 1.0534e-04
Loss = 8.0990e-02, PNorm = 85.6436, GNorm = 0.7826, lr_0 = 1.0527e-04
Loss = 7.0948e-02, PNorm = 85.6437, GNorm = 0.5608, lr_0 = 1.0519e-04
Loss = 7.4666e-02, PNorm = 85.6463, GNorm = 0.6570, lr_0 = 1.0512e-04
Loss = 7.8166e-02, PNorm = 85.6463, GNorm = 0.5699, lr_0 = 1.0505e-04
Loss = 7.9403e-02, PNorm = 85.6468, GNorm = 0.8030, lr_0 = 1.0498e-04
Loss = 8.2459e-02, PNorm = 85.6487, GNorm = 0.7221, lr_0 = 1.0491e-04
Loss = 7.9164e-02, PNorm = 85.6515, GNorm = 0.7882, lr_0 = 1.0483e-04
Loss = 6.9753e-02, PNorm = 85.6547, GNorm = 0.7829, lr_0 = 1.0476e-04
Loss = 7.3882e-02, PNorm = 85.6562, GNorm = 0.7273, lr_0 = 1.0469e-04
Loss = 7.0282e-02, PNorm = 85.6578, GNorm = 0.6511, lr_0 = 1.0462e-04
Loss = 7.9627e-02, PNorm = 85.6608, GNorm = 0.9599, lr_0 = 1.0455e-04
Loss = 7.0512e-02, PNorm = 85.6629, GNorm = 0.5913, lr_0 = 1.0448e-04
Loss = 8.3773e-02, PNorm = 85.6649, GNorm = 0.6308, lr_0 = 1.0440e-04
Loss = 8.0276e-02, PNorm = 85.6660, GNorm = 0.5407, lr_0 = 1.0433e-04
Loss = 7.8818e-02, PNorm = 85.6670, GNorm = 0.7098, lr_0 = 1.0426e-04
Loss = 6.9516e-02, PNorm = 85.6675, GNorm = 0.6366, lr_0 = 1.0419e-04
Loss = 7.2856e-02, PNorm = 85.6683, GNorm = 0.7151, lr_0 = 1.0412e-04
Loss = 7.5597e-02, PNorm = 85.6707, GNorm = 0.6392, lr_0 = 1.0405e-04
Loss = 7.0816e-02, PNorm = 85.6721, GNorm = 0.5561, lr_0 = 1.0398e-04
Loss = 8.7206e-02, PNorm = 85.6731, GNorm = 0.8510, lr_0 = 1.0391e-04
Loss = 8.3136e-02, PNorm = 85.6729, GNorm = 0.6302, lr_0 = 1.0383e-04
Loss = 7.0616e-02, PNorm = 85.6730, GNorm = 0.7475, lr_0 = 1.0376e-04
Loss = 7.4306e-02, PNorm = 85.6735, GNorm = 0.6071, lr_0 = 1.0369e-04
Loss = 8.7895e-02, PNorm = 85.6752, GNorm = 0.7458, lr_0 = 1.0362e-04
Loss = 6.8031e-02, PNorm = 85.6769, GNorm = 0.6376, lr_0 = 1.0355e-04
Loss = 8.1245e-02, PNorm = 85.6799, GNorm = 0.6671, lr_0 = 1.0348e-04
Loss = 7.2795e-02, PNorm = 85.6805, GNorm = 0.6845, lr_0 = 1.0341e-04
Loss = 6.5136e-02, PNorm = 85.6815, GNorm = 0.5566, lr_0 = 1.0334e-04
Loss = 7.8464e-02, PNorm = 85.6839, GNorm = 0.6729, lr_0 = 1.0327e-04
Loss = 7.6026e-02, PNorm = 85.6842, GNorm = 0.5923, lr_0 = 1.0320e-04
Loss = 7.9332e-02, PNorm = 85.6860, GNorm = 0.6671, lr_0 = 1.0312e-04
Loss = 8.2229e-02, PNorm = 85.6867, GNorm = 0.5798, lr_0 = 1.0305e-04
Loss = 6.5860e-02, PNorm = 85.6887, GNorm = 0.5685, lr_0 = 1.0298e-04
Loss = 7.4395e-02, PNorm = 85.6907, GNorm = 0.6971, lr_0 = 1.0291e-04
Loss = 8.0729e-02, PNorm = 85.6925, GNorm = 0.6954, lr_0 = 1.0284e-04
Loss = 7.4417e-02, PNorm = 85.6943, GNorm = 0.7371, lr_0 = 1.0277e-04
Loss = 7.4130e-02, PNorm = 85.6946, GNorm = 0.5573, lr_0 = 1.0270e-04
Loss = 7.6346e-02, PNorm = 85.6942, GNorm = 0.5604, lr_0 = 1.0263e-04
Loss = 6.9203e-02, PNorm = 85.6955, GNorm = 0.6253, lr_0 = 1.0256e-04
Loss = 7.5785e-02, PNorm = 85.6964, GNorm = 0.5284, lr_0 = 1.0249e-04
Loss = 8.1164e-02, PNorm = 85.6975, GNorm = 0.5575, lr_0 = 1.0242e-04
Loss = 7.1944e-02, PNorm = 85.6980, GNorm = 0.6669, lr_0 = 1.0235e-04
Loss = 7.3244e-02, PNorm = 85.6988, GNorm = 0.6052, lr_0 = 1.0228e-04
Loss = 7.4108e-02, PNorm = 85.6984, GNorm = 0.8466, lr_0 = 1.0221e-04
Loss = 6.8978e-02, PNorm = 85.6979, GNorm = 0.5588, lr_0 = 1.0214e-04
Loss = 7.5160e-02, PNorm = 85.6990, GNorm = 0.5149, lr_0 = 1.0207e-04
Loss = 7.4552e-02, PNorm = 85.7010, GNorm = 0.6212, lr_0 = 1.0200e-04
Loss = 8.2247e-02, PNorm = 85.7040, GNorm = 0.7746, lr_0 = 1.0193e-04
Loss = 7.2791e-02, PNorm = 85.7067, GNorm = 0.6427, lr_0 = 1.0186e-04
Loss = 7.7571e-02, PNorm = 85.7086, GNorm = 0.5712, lr_0 = 1.0179e-04
Loss = 7.0052e-02, PNorm = 85.7106, GNorm = 0.5428, lr_0 = 1.0172e-04
Loss = 7.9351e-02, PNorm = 85.7118, GNorm = 0.8301, lr_0 = 1.0165e-04
Loss = 6.8155e-02, PNorm = 85.7132, GNorm = 0.5358, lr_0 = 1.0158e-04
Loss = 8.0074e-02, PNorm = 85.7139, GNorm = 0.6890, lr_0 = 1.0151e-04
Loss = 7.4738e-02, PNorm = 85.7146, GNorm = 0.5703, lr_0 = 1.0144e-04
Loss = 7.8659e-02, PNorm = 85.7158, GNorm = 0.6073, lr_0 = 1.0137e-04
Loss = 8.2220e-02, PNorm = 85.7170, GNorm = 0.7972, lr_0 = 1.0130e-04
Loss = 7.1917e-02, PNorm = 85.7190, GNorm = 0.5599, lr_0 = 1.0123e-04
Loss = 7.3660e-02, PNorm = 85.7203, GNorm = 0.5423, lr_0 = 1.0116e-04
Loss = 8.0011e-02, PNorm = 85.7213, GNorm = 0.6414, lr_0 = 1.0110e-04
Loss = 7.5693e-02, PNorm = 85.7227, GNorm = 0.8161, lr_0 = 1.0103e-04
Loss = 8.4639e-02, PNorm = 85.7249, GNorm = 0.5181, lr_0 = 1.0096e-04
Loss = 7.9797e-02, PNorm = 85.7261, GNorm = 0.7273, lr_0 = 1.0089e-04
Loss = 7.2224e-02, PNorm = 85.7268, GNorm = 0.5854, lr_0 = 1.0082e-04
Loss = 7.6088e-02, PNorm = 85.7268, GNorm = 0.5331, lr_0 = 1.0075e-04
Loss = 7.2868e-02, PNorm = 85.7292, GNorm = 0.5241, lr_0 = 1.0068e-04
Loss = 6.9069e-02, PNorm = 85.7319, GNorm = 0.5137, lr_0 = 1.0061e-04
Loss = 7.4682e-02, PNorm = 85.7333, GNorm = 0.6161, lr_0 = 1.0054e-04
Loss = 7.6206e-02, PNorm = 85.7341, GNorm = 0.7297, lr_0 = 1.0047e-04
Loss = 6.6372e-02, PNorm = 85.7348, GNorm = 0.6298, lr_0 = 1.0041e-04
Loss = 7.7860e-02, PNorm = 85.7349, GNorm = 0.6420, lr_0 = 1.0034e-04
Loss = 7.1772e-02, PNorm = 85.7365, GNorm = 0.6446, lr_0 = 1.0027e-04
Loss = 7.6975e-02, PNorm = 85.7391, GNorm = 0.5374, lr_0 = 1.0020e-04
Loss = 7.0389e-02, PNorm = 85.7409, GNorm = 0.6326, lr_0 = 1.0013e-04
Loss = 7.2645e-02, PNorm = 85.7420, GNorm = 0.6972, lr_0 = 1.0006e-04
Loss = 8.1433e-02, PNorm = 85.7424, GNorm = 1.0160, lr_0 = 1.0000e-04
Validation mae = 0.227947
Model 0 best validation mae = 0.225736 on epoch 28
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.225747
Ensemble test mae = 0.225747
10-fold cross validation
	Seed 0 ==> test mae = 0.225468
	Seed 1 ==> test mae = 0.225504
	Seed 2 ==> test mae = 0.226333
	Seed 3 ==> test mae = 0.225760
	Seed 4 ==> test mae = 0.224777
	Seed 5 ==> test mae = 0.225236
	Seed 6 ==> test mae = 0.225272
	Seed 7 ==> test mae = 0.224802
	Seed 8 ==> test mae = 0.225228
	Seed 9 ==> test mae = 0.225747
Overall test mae = 0.225413 +/- 0.000442
Elapsed time = 5:15:33
